transforms_v2_kernel_infos.py 61.2 KB
Newer Older
1
2
3
4
import functools
import itertools

import numpy as np
5
import PIL.Image
6
7
import pytest
import torch.testing
8
import torchvision.ops
9
import torchvision.transforms.v2.functional as F
10
from common_utils import (
11
    ArgsKwargs,
12
    combinations_grid,
13
    DEFAULT_PORTRAIT_SPATIAL_SIZE,
14
15
    get_num_channels,
    ImageLoader,
16
    InfoBase,
17
    make_bounding_box_loader,
18
    make_bounding_box_loaders,
19
    make_detection_mask_loader,
20
21
    make_image_loader,
    make_image_loaders,
22
    make_image_loaders_for_interpolation,
23
    make_mask_loaders,
24
    make_video_loader,
25
    make_video_loaders,
26
27
    mark_framework_limitation,
    TestMark,
28
)
29
from torchvision import datapoints
30
from torchvision.transforms._functional_tensor import _max_value as get_max_value, _parse_pad_padding
31
32
33
34

__all__ = ["KernelInfo", "KERNEL_INFOS"]


35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
class KernelInfo(InfoBase):
    def __init__(
        self,
        kernel,
        *,
        # Defaults to `kernel.__name__`. Should be set if the function is exposed under a different name
        # TODO: This can probably be removed after roll-out since we shouldn't have any aliasing then
        kernel_name=None,
        # Most common tests use these inputs to check the kernel. As such it should cover all valid code paths, but
        # should not include extensive parameter combinations to keep to overall test count moderate.
        sample_inputs_fn,
        # This function should mirror the kernel. It should have the same signature as the `kernel` and as such also
        # take tensors as inputs. Any conversion into another object type, e.g. PIL images or numpy arrays, should
        # happen inside the function. It should return a tensor or to be more precise an object that can be compared to
        # a tensor by `assert_close`. If omitted, no reference test will be performed.
        reference_fn=None,
        # These inputs are only used for the reference tests and thus can be comprehensive with regard to the parameter
        # values to be tested. If not specified, `sample_inputs_fn` will be used.
        reference_inputs_fn=None,
54
        # If true-ish, triggers a test that checks the kernel for consistency between uint8 and float32 inputs with the
55
        # reference inputs. This is usually used whenever we use a PIL kernel as reference.
56
57
58
59
        # Can be a callable in which case it will be called with `other_args, kwargs`. It should return the same
        # structure, but with adapted parameters. This is useful in case a parameter value is closely tied to the input
        # dtype.
        float32_vs_uint8=False,
60
61
62
        # Some kernels don't have dispatchers that would handle logging the usage. Thus, the kernel has to do it
        # manually. If set, triggers a test that makes sure this happens.
        logs_usage=False,
63
64
65
66
67
68
69
70
71
72
        # See InfoBase
        test_marks=None,
        # See InfoBase
        closeness_kwargs=None,
    ):
        super().__init__(id=kernel_name or kernel.__name__, test_marks=test_marks, closeness_kwargs=closeness_kwargs)
        self.kernel = kernel
        self.sample_inputs_fn = sample_inputs_fn
        self.reference_fn = reference_fn
        self.reference_inputs_fn = reference_inputs_fn
73

74
75
76
        if float32_vs_uint8 and not callable(float32_vs_uint8):
            float32_vs_uint8 = lambda other_args, kwargs: (other_args, kwargs)  # noqa: E731
        self.float32_vs_uint8 = float32_vs_uint8
77
        self.logs_usage = logs_usage
78
79


80
def pixel_difference_closeness_kwargs(uint8_atol, *, dtype=torch.uint8, mae=False):
81
    return dict(atol=uint8_atol / 255 * get_max_value(dtype), rtol=0, mae=mae)
82
83
84
85


def cuda_vs_cpu_pixel_difference(atol=1):
    return {
86
        (("TestKernels", "test_cuda_vs_cpu"), dtype, "cuda"): pixel_difference_closeness_kwargs(atol, dtype=dtype)
87
88
89
90
        for dtype in [torch.uint8, torch.float32]
    }


91
def pil_reference_pixel_difference(atol=1, mae=False):
92
    return {
93
        (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): pixel_difference_closeness_kwargs(
94
            atol, mae=mae
95
96
97
98
        )
    }


99
def float32_vs_uint8_pixel_difference(atol=1, mae=False):
100
101
102
103
104
    return {
        (
            ("TestKernels", "test_float32_vs_uint8"),
            torch.float32,
            "cpu",
105
        ): pixel_difference_closeness_kwargs(atol, dtype=torch.float32, mae=mae)
106
    }
107

108

109
def scripted_vs_eager_float64_tolerances(device, atol=1e-6, rtol=1e-6):
110
111
112
113
114
    return {
        (("TestKernels", "test_scripted_vs_eager"), torch.float64, device): {"atol": atol, "rtol": rtol, "mae": False},
    }


115
116
def pil_reference_wrapper(pil_kernel):
    @functools.wraps(pil_kernel)
117
118
119
120
    def wrapper(input_tensor, *other_args, **kwargs):
        if input_tensor.dtype != torch.uint8:
            raise pytest.UsageError(f"Can only test uint8 tensor images against PIL, but input is {input_tensor.dtype}")
        if input_tensor.ndim > 3:
121
            raise pytest.UsageError(
122
                f"Can only test single tensor images against PIL, but input has shape {input_tensor.shape}"
123
124
            )

125
126
127
128
129
130
131
132
133
134
135
136
137
138
        input_pil = F.to_image_pil(input_tensor)
        output_pil = pil_kernel(input_pil, *other_args, **kwargs)
        if not isinstance(output_pil, PIL.Image.Image):
            return output_pil

        output_tensor = F.to_image_tensor(output_pil)

        # 2D mask shenanigans
        if output_tensor.ndim == 2 and input_tensor.ndim == 3:
            output_tensor = output_tensor.unsqueeze(0)
        elif output_tensor.ndim == 3 and input_tensor.ndim == 2:
            output_tensor = output_tensor.squeeze(0)

        return output_tensor
139
140
141
142

    return wrapper


143
144
145
146
def xfail_jit(reason, *, condition=None):
    return TestMark(("TestKernels", "test_scripted_vs_eager"), pytest.mark.xfail(reason=reason), condition=condition)


147
def xfail_jit_python_scalar_arg(name, *, reason=None):
148
149
    return xfail_jit(
        reason or f"Python scalar int or float for `{name}` is not supported when scripting",
150
151
152
153
        condition=lambda args_kwargs: isinstance(args_kwargs.kwargs.get(name), (int, float)),
    )


154
155
156
KERNEL_INFOS = []


157
def get_fills(*, num_channels, dtype):
158
159
    yield None

160
161
162
163
    int_value = get_max_value(dtype)
    float_value = int_value / 2
    yield int_value
    yield float_value
164

165
166
167
    for vector_type in [list, tuple]:
        yield vector_type([int_value])
        yield vector_type([float_value])
168

169
170
171
        if num_channels > 1:
            yield vector_type(float_value * c / 10 for c in range(num_channels))
            yield vector_type(int_value if c % 2 == 0 else 0 for c in range(num_channels))
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186


def float32_vs_uint8_fill_adapter(other_args, kwargs):
    fill = kwargs.get("fill")
    if fill is None:
        return other_args, kwargs

    if isinstance(fill, (int, float)):
        fill /= 255
    else:
        fill = type(fill)(fill_ / 255 for fill_ in fill)

    return other_args, dict(kwargs, fill=fill)


187
def reference_affine_bounding_boxes_helper(bounding_boxes, *, format, spatial_size, affine_matrix):
188
    def transform(bbox, affine_matrix_, format_, spatial_size_):
189
190
        # Go to float before converting to prevent precision loss in case of CXCYWH -> XYXY and W or H is 1
        in_dtype = bbox.dtype
191
192
        if not torch.is_floating_point(bbox):
            bbox = bbox.float()
193
        bbox_xyxy = F.convert_format_bounding_boxes(
194
195
196
197
            bbox.as_subclass(torch.Tensor),
            old_format=format_,
            new_format=datapoints.BoundingBoxFormat.XYXY,
            inplace=True,
198
        )
199
200
201
202
203
204
205
206
        points = np.array(
            [
                [bbox_xyxy[0].item(), bbox_xyxy[1].item(), 1.0],
                [bbox_xyxy[2].item(), bbox_xyxy[1].item(), 1.0],
                [bbox_xyxy[0].item(), bbox_xyxy[3].item(), 1.0],
                [bbox_xyxy[2].item(), bbox_xyxy[3].item(), 1.0],
            ]
        )
207
        transformed_points = np.matmul(points, affine_matrix_.T)
208
209
        out_bbox = torch.tensor(
            [
210
211
212
213
                np.min(transformed_points[:, 0]).item(),
                np.min(transformed_points[:, 1]).item(),
                np.max(transformed_points[:, 0]).item(),
                np.max(transformed_points[:, 1]).item(),
214
            ],
215
            dtype=bbox_xyxy.dtype,
216
        )
217
        out_bbox = F.convert_format_bounding_boxes(
218
            out_bbox, old_format=datapoints.BoundingBoxFormat.XYXY, new_format=format_, inplace=True
219
        )
220
        # It is important to clamp before casting, especially for CXCYWH format, dtype=int64
221
        out_bbox = F.clamp_bounding_boxes(out_bbox, format=format_, spatial_size=spatial_size_)
222
223
        out_bbox = out_bbox.to(dtype=in_dtype)
        return out_bbox
224

225
226
    if bounding_boxes.ndim < 2:
        bounding_boxes = [bounding_boxes]
227

228
    expected_bboxes = [transform(bbox, affine_matrix, format, spatial_size) for bbox in bounding_boxes]
229
230
231
232
233
234
235
236
    if len(expected_bboxes) > 1:
        expected_bboxes = torch.stack(expected_bboxes)
    else:
        expected_bboxes = expected_bboxes[0]

    return expected_bboxes


237
def sample_inputs_convert_format_bounding_boxes():
238
    formats = list(datapoints.BoundingBoxFormat)
239
240
    for bounding_boxes_loader, new_format in itertools.product(make_bounding_box_loaders(formats=formats), formats):
        yield ArgsKwargs(bounding_boxes_loader, old_format=bounding_boxes_loader.format, new_format=new_format)
241
242


243
def reference_convert_format_bounding_boxes(bounding_boxes, old_format, new_format):
244
    return torchvision.ops.box_convert(
245
246
        bounding_boxes, in_fmt=old_format.name.lower(), out_fmt=new_format.name.lower()
    ).to(bounding_boxes.dtype)
247
248


249
250
def reference_inputs_convert_format_bounding_boxes():
    for args_kwargs in sample_inputs_convert_format_bounding_boxes():
251
252
        if len(args_kwargs.args[0].shape) == 2:
            yield args_kwargs
253
254
255
256


KERNEL_INFOS.append(
    KernelInfo(
257
258
259
260
        F.convert_format_bounding_boxes,
        sample_inputs_fn=sample_inputs_convert_format_bounding_boxes,
        reference_fn=reference_convert_format_bounding_boxes,
        reference_inputs_fn=reference_inputs_convert_format_bounding_boxes,
261
        logs_usage=True,
262
263
264
        closeness_kwargs={
            (("TestKernels", "test_against_reference"), torch.int64, "cpu"): dict(atol=1, rtol=0),
        },
265
266
267
268
    ),
)


269
270
271
272
_CROP_PARAMS = combinations_grid(top=[-8, 0, 9], left=[-8, 0, 9], height=[12, 20], width=[12, 20])


def sample_inputs_crop_image_tensor():
273
    for image_loader, params in itertools.product(
274
        make_image_loaders(sizes=[(16, 17)], color_spaces=["RGB"], dtypes=[torch.float32]),
275
276
277
278
279
280
281
282
        [
            dict(top=4, left=3, height=7, width=8),
            dict(top=-1, left=3, height=7, width=8),
            dict(top=4, left=-1, height=7, width=8),
            dict(top=4, left=3, height=17, width=8),
            dict(top=4, left=3, height=7, width=18),
        ],
    ):
283
284
285
286
        yield ArgsKwargs(image_loader, **params)


def reference_inputs_crop_image_tensor():
287
288
289
    for image_loader, params in itertools.product(
        make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _CROP_PARAMS
    ):
290
291
292
        yield ArgsKwargs(image_loader, **params)


293
294
def sample_inputs_crop_bounding_boxes():
    for bounding_boxes_loader, params in itertools.product(
295
296
        make_bounding_box_loaders(), [_CROP_PARAMS[0], _CROP_PARAMS[-1]]
    ):
297
        yield ArgsKwargs(bounding_boxes_loader, format=bounding_boxes_loader.format, **params)
298
299
300


def sample_inputs_crop_mask():
301
    for mask_loader in make_mask_loaders(sizes=[(16, 17)], num_categories=[10], num_objects=[5]):
302
        yield ArgsKwargs(mask_loader, top=4, left=3, height=7, width=8)
303
304
305
306
307
308
309


def reference_inputs_crop_mask():
    for mask_loader, params in itertools.product(make_mask_loaders(extra_dims=[()], num_objects=[1]), _CROP_PARAMS):
        yield ArgsKwargs(mask_loader, **params)


310
def sample_inputs_crop_video():
311
    for video_loader in make_video_loaders(sizes=[(16, 17)], num_frames=[3]):
312
313
314
        yield ArgsKwargs(video_loader, top=4, left=3, height=7, width=8)


315
def reference_crop_bounding_boxes(bounding_boxes, *, format, top, left, height, width):
316
317
318
319
320
    affine_matrix = np.array(
        [
            [1, 0, -left],
            [0, 1, -top],
        ],
321
        dtype="float64" if bounding_boxes.dtype == torch.float64 else "float32",
322
323
    )

324
    spatial_size = (height, width)
325
326
    expected_bboxes = reference_affine_bounding_boxes_helper(
        bounding_boxes, format=format, spatial_size=spatial_size, affine_matrix=affine_matrix
327
328
    )
    return expected_bboxes, spatial_size
329
330


331
332
def reference_inputs_crop_bounding_boxes():
    for bounding_boxes_loader, params in itertools.product(
333
334
        make_bounding_box_loaders(extra_dims=((), (4,))), [_CROP_PARAMS[0], _CROP_PARAMS[-1]]
    ):
335
        yield ArgsKwargs(bounding_boxes_loader, format=bounding_boxes_loader.format, **params)
336
337


338
339
340
341
342
343
344
345
KERNEL_INFOS.extend(
    [
        KernelInfo(
            F.crop_image_tensor,
            kernel_name="crop_image_tensor",
            sample_inputs_fn=sample_inputs_crop_image_tensor,
            reference_fn=pil_reference_wrapper(F.crop_image_pil),
            reference_inputs_fn=reference_inputs_crop_image_tensor,
346
            float32_vs_uint8=True,
347
348
        ),
        KernelInfo(
349
350
351
352
            F.crop_bounding_boxes,
            sample_inputs_fn=sample_inputs_crop_bounding_boxes,
            reference_fn=reference_crop_bounding_boxes,
            reference_inputs_fn=reference_inputs_crop_bounding_boxes,
353
354
355
356
357
358
        ),
        KernelInfo(
            F.crop_mask,
            sample_inputs_fn=sample_inputs_crop_mask,
            reference_fn=pil_reference_wrapper(F.crop_image_pil),
            reference_inputs_fn=reference_inputs_crop_mask,
359
            float32_vs_uint8=True,
360
        ),
361
362
363
364
        KernelInfo(
            F.crop_video,
            sample_inputs_fn=sample_inputs_crop_video,
        ),
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
    ]
)

_RESIZED_CROP_PARAMS = combinations_grid(top=[-8, 9], left=[-8, 9], height=[12], width=[12], size=[(16, 18)])


def sample_inputs_resized_crop_image_tensor():
    for image_loader in make_image_loaders():
        yield ArgsKwargs(image_loader, **_RESIZED_CROP_PARAMS[0])


@pil_reference_wrapper
def reference_resized_crop_image_tensor(*args, **kwargs):
    if not kwargs.pop("antialias", False) and kwargs.get("interpolation", F.InterpolationMode.BILINEAR) in {
        F.InterpolationMode.BILINEAR,
        F.InterpolationMode.BICUBIC,
    }:
        raise pytest.UsageError("Anti-aliasing is always active in PIL")
    return F.resized_crop_image_pil(*args, **kwargs)


def reference_inputs_resized_crop_image_tensor():
    for image_loader, interpolation, params in itertools.product(
388
        make_image_loaders_for_interpolation(),
389
390
        [
            F.InterpolationMode.NEAREST,
391
            F.InterpolationMode.NEAREST_EXACT,
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
            F.InterpolationMode.BILINEAR,
            F.InterpolationMode.BICUBIC,
        ],
        _RESIZED_CROP_PARAMS,
    ):
        yield ArgsKwargs(
            image_loader,
            interpolation=interpolation,
            antialias=interpolation
            in {
                F.InterpolationMode.BILINEAR,
                F.InterpolationMode.BICUBIC,
            },
            **params,
        )


409
410
411
def sample_inputs_resized_crop_bounding_boxes():
    for bounding_boxes_loader in make_bounding_box_loaders():
        yield ArgsKwargs(bounding_boxes_loader, format=bounding_boxes_loader.format, **_RESIZED_CROP_PARAMS[0])
412
413
414
415
416
417
418


def sample_inputs_resized_crop_mask():
    for mask_loader in make_mask_loaders():
        yield ArgsKwargs(mask_loader, **_RESIZED_CROP_PARAMS[0])


419
def sample_inputs_resized_crop_video():
420
    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
421
422
423
        yield ArgsKwargs(video_loader, **_RESIZED_CROP_PARAMS[0])


424
425
426
427
428
429
430
KERNEL_INFOS.extend(
    [
        KernelInfo(
            F.resized_crop_image_tensor,
            sample_inputs_fn=sample_inputs_resized_crop_image_tensor,
            reference_fn=reference_resized_crop_image_tensor,
            reference_inputs_fn=reference_inputs_resized_crop_image_tensor,
431
            float32_vs_uint8=True,
432
            closeness_kwargs={
433
                **cuda_vs_cpu_pixel_difference(),
434
435
                **pil_reference_pixel_difference(3, mae=True),
                **float32_vs_uint8_pixel_difference(3, mae=True),
436
            },
437
438
        ),
        KernelInfo(
439
440
            F.resized_crop_bounding_boxes,
            sample_inputs_fn=sample_inputs_resized_crop_bounding_boxes,
441
442
443
444
445
        ),
        KernelInfo(
            F.resized_crop_mask,
            sample_inputs_fn=sample_inputs_resized_crop_mask,
        ),
446
447
448
        KernelInfo(
            F.resized_crop_video,
            sample_inputs_fn=sample_inputs_resized_crop_video,
449
            closeness_kwargs=cuda_vs_cpu_pixel_difference(),
450
        ),
451
452
453
454
455
456
457
458
459
460
    ]
)

_PAD_PARAMS = combinations_grid(
    padding=[[1], [1, 1], [1, 1, 2, 2]],
    padding_mode=["constant", "symmetric", "edge", "reflect"],
)


def sample_inputs_pad_image_tensor():
461
    make_pad_image_loaders = functools.partial(
462
        make_image_loaders, sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], dtypes=[torch.float32]
463
464
465
466
467
468
469
470
471
    )

    for image_loader, padding in itertools.product(
        make_pad_image_loaders(),
        [1, (1,), (1, 2), (1, 2, 3, 4), [1], [1, 2], [1, 2, 3, 4]],
    ):
        yield ArgsKwargs(image_loader, padding=padding)

    for image_loader in make_pad_image_loaders():
472
        for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
473
474
475
476
477
478
479
480
481
482
483
484
485
            yield ArgsKwargs(image_loader, padding=[1], fill=fill)

    for image_loader, padding_mode in itertools.product(
        # We branch for non-constant padding and integer inputs
        make_pad_image_loaders(dtypes=[torch.uint8]),
        ["constant", "symmetric", "edge", "reflect"],
    ):
        yield ArgsKwargs(image_loader, padding=[1], padding_mode=padding_mode)

    # `torch.nn.functional.pad` does not support symmetric padding, and thus we have a custom implementation. Besides
    # negative padding, this is already handled by the inputs above.
    for image_loader in make_pad_image_loaders():
        yield ArgsKwargs(image_loader, padding=[-1], padding_mode="symmetric")
486
487
488


def reference_inputs_pad_image_tensor():
489
490
491
492
493
494
495
    for image_loader, params in itertools.product(
        make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _PAD_PARAMS
    ):
        for fill in get_fills(
            num_channels=image_loader.num_channels,
            dtype=image_loader.dtype,
        ):
496
497
498
499
            # FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it?
            if isinstance(fill, (list, tuple)):
                continue

500
501
502
            yield ArgsKwargs(image_loader, fill=fill, **params)


503
504
def sample_inputs_pad_bounding_boxes():
    for bounding_boxes_loader, padding in itertools.product(
505
506
        make_bounding_box_loaders(), [1, (1,), (1, 2), (1, 2, 3, 4), [1], [1, 2], [1, 2, 3, 4]]
    ):
507
        yield ArgsKwargs(
508
509
510
            bounding_boxes_loader,
            format=bounding_boxes_loader.format,
            spatial_size=bounding_boxes_loader.spatial_size,
511
512
            padding=padding,
            padding_mode="constant",
513
        )
514
515
516


def sample_inputs_pad_mask():
517
    for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_categories=[10], num_objects=[5]):
518
        yield ArgsKwargs(mask_loader, padding=[1])
519
520
521


def reference_inputs_pad_mask():
522
523
524
525
    for mask_loader, fill, params in itertools.product(
        make_mask_loaders(num_objects=[1], extra_dims=[()]), [None, 127], _PAD_PARAMS
    ):
        yield ArgsKwargs(mask_loader, fill=fill, **params)
526
527


528
def sample_inputs_pad_video():
529
    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
530
531
532
        yield ArgsKwargs(video_loader, padding=[1])


533
def reference_pad_bounding_boxes(bounding_boxes, *, format, spatial_size, padding, padding_mode):
534
535
536
537
538
539
540
541

    left, right, top, bottom = _parse_pad_padding(padding)

    affine_matrix = np.array(
        [
            [1, 0, left],
            [0, 1, top],
        ],
542
        dtype="float64" if bounding_boxes.dtype == torch.float64 else "float32",
543
544
545
546
547
    )

    height = spatial_size[0] + top + bottom
    width = spatial_size[1] + left + right

548
549
    expected_bboxes = reference_affine_bounding_boxes_helper(
        bounding_boxes, format=format, spatial_size=(height, width), affine_matrix=affine_matrix
550
    )
551
552
553
    return expected_bboxes, (height, width)


554
555
def reference_inputs_pad_bounding_boxes():
    for bounding_boxes_loader, padding in itertools.product(
556
557
558
        make_bounding_box_loaders(extra_dims=((), (4,))), [1, (1,), (1, 2), (1, 2, 3, 4), [1], [1, 2], [1, 2, 3, 4]]
    ):
        yield ArgsKwargs(
559
560
561
            bounding_boxes_loader,
            format=bounding_boxes_loader.format,
            spatial_size=bounding_boxes_loader.spatial_size,
562
563
564
565
566
            padding=padding,
            padding_mode="constant",
        )


567
568
569
570
571
572
573
574
575
576
def pad_xfail_jit_fill_condition(args_kwargs):
    fill = args_kwargs.kwargs.get("fill")
    if not isinstance(fill, (list, tuple)):
        return False
    elif isinstance(fill, tuple):
        return True
    else:  # isinstance(fill, list):
        return all(isinstance(f, int) for f in fill)


577
578
579
580
581
582
583
KERNEL_INFOS.extend(
    [
        KernelInfo(
            F.pad_image_tensor,
            sample_inputs_fn=sample_inputs_pad_image_tensor,
            reference_fn=pil_reference_wrapper(F.pad_image_pil),
            reference_inputs_fn=reference_inputs_pad_image_tensor,
584
585
            float32_vs_uint8=float32_vs_uint8_fill_adapter,
            closeness_kwargs=float32_vs_uint8_pixel_difference(),
586
            test_marks=[
587
588
589
590
                xfail_jit_python_scalar_arg("padding"),
                xfail_jit(
                    "F.pad only supports vector fills for list of floats", condition=pad_xfail_jit_fill_condition
                ),
591
            ],
592
593
        ),
        KernelInfo(
594
595
596
597
            F.pad_bounding_boxes,
            sample_inputs_fn=sample_inputs_pad_bounding_boxes,
            reference_fn=reference_pad_bounding_boxes,
            reference_inputs_fn=reference_inputs_pad_bounding_boxes,
598
            test_marks=[
599
                xfail_jit_python_scalar_arg("padding"),
600
            ],
601
602
603
604
605
606
        ),
        KernelInfo(
            F.pad_mask,
            sample_inputs_fn=sample_inputs_pad_mask,
            reference_fn=pil_reference_wrapper(F.pad_image_pil),
            reference_inputs_fn=reference_inputs_pad_mask,
607
            float32_vs_uint8=float32_vs_uint8_fill_adapter,
608
        ),
609
610
611
612
        KernelInfo(
            F.pad_video,
            sample_inputs_fn=sample_inputs_pad_video,
        ),
613
614
615
616
617
618
619
    ]
)

_PERSPECTIVE_COEFFS = [
    [1.2405, 0.1772, -6.9113, 0.0463, 1.251, -5.235, 0.00013, 0.0018],
    [0.7366, -0.11724, 1.45775, -0.15012, 0.73406, 2.6019, -0.0072, -0.0063],
]
620
621
_STARTPOINTS = [[0, 1], [2, 3], [4, 5], [6, 7]]
_ENDPOINTS = [[9, 8], [7, 6], [5, 4], [3, 2]]
622
623
624


def sample_inputs_perspective_image_tensor():
625
    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]):
626
        for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
627
628
629
630
631
            yield ArgsKwargs(
                image_loader, startpoints=None, endpoints=None, fill=fill, coefficients=_PERSPECTIVE_COEFFS[0]
            )

    yield ArgsKwargs(make_image_loader(), startpoints=_STARTPOINTS, endpoints=_ENDPOINTS)
632
633
634


def reference_inputs_perspective_image_tensor():
635
636
637
638
639
640
641
    for image_loader, coefficients, interpolation in itertools.product(
        make_image_loaders_for_interpolation(),
        _PERSPECTIVE_COEFFS,
        [
            F.InterpolationMode.NEAREST,
            F.InterpolationMode.BILINEAR,
        ],
642
643
    ):
        for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
644
645
646
647
            # FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it?
            if isinstance(fill, (list, tuple)):
                continue

648
649
650
651
652
653
654
655
            yield ArgsKwargs(
                image_loader,
                startpoints=None,
                endpoints=None,
                interpolation=interpolation,
                fill=fill,
                coefficients=coefficients,
            )
656
657


658
659
def sample_inputs_perspective_bounding_boxes():
    for bounding_boxes_loader in make_bounding_box_loaders():
660
        yield ArgsKwargs(
661
662
663
            bounding_boxes_loader,
            format=bounding_boxes_loader.format,
            spatial_size=bounding_boxes_loader.spatial_size,
664
665
666
            startpoints=None,
            endpoints=None,
            coefficients=_PERSPECTIVE_COEFFS[0],
667
668
        )

669
    format = datapoints.BoundingBoxFormat.XYXY
670
    loader = make_bounding_box_loader(format=format)
671
    yield ArgsKwargs(
672
        loader, format=format, spatial_size=loader.spatial_size, startpoints=_STARTPOINTS, endpoints=_ENDPOINTS
673
674
    )

675
676

def sample_inputs_perspective_mask():
677
    for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]):
678
679
680
        yield ArgsKwargs(mask_loader, startpoints=None, endpoints=None, coefficients=_PERSPECTIVE_COEFFS[0])

    yield ArgsKwargs(make_detection_mask_loader(), startpoints=_STARTPOINTS, endpoints=_ENDPOINTS)
681
682
683
684
685
686


def reference_inputs_perspective_mask():
    for mask_loader, perspective_coeffs in itertools.product(
        make_mask_loaders(extra_dims=[()], num_objects=[1]), _PERSPECTIVE_COEFFS
    ):
687
        yield ArgsKwargs(mask_loader, startpoints=None, endpoints=None, coefficients=perspective_coeffs)
688
689


690
def sample_inputs_perspective_video():
691
    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
692
693
694
        yield ArgsKwargs(video_loader, startpoints=None, endpoints=None, coefficients=_PERSPECTIVE_COEFFS[0])

    yield ArgsKwargs(make_video_loader(), startpoints=_STARTPOINTS, endpoints=_ENDPOINTS)
695
696


697
698
699
700
701
702
703
KERNEL_INFOS.extend(
    [
        KernelInfo(
            F.perspective_image_tensor,
            sample_inputs_fn=sample_inputs_perspective_image_tensor,
            reference_fn=pil_reference_wrapper(F.perspective_image_pil),
            reference_inputs_fn=reference_inputs_perspective_image_tensor,
704
            float32_vs_uint8=float32_vs_uint8_fill_adapter,
705
            closeness_kwargs={
706
                **pil_reference_pixel_difference(2, mae=True),
707
708
                **cuda_vs_cpu_pixel_difference(),
                **float32_vs_uint8_pixel_difference(),
709
710
                **scripted_vs_eager_float64_tolerances("cpu", atol=1e-5, rtol=1e-5),
                **scripted_vs_eager_float64_tolerances("cuda", atol=1e-5, rtol=1e-5),
711
            },
712
            test_marks=[xfail_jit_python_scalar_arg("fill")],
713
714
        ),
        KernelInfo(
715
716
            F.perspective_bounding_boxes,
            sample_inputs_fn=sample_inputs_perspective_bounding_boxes,
717
718
719
720
            closeness_kwargs={
                **scripted_vs_eager_float64_tolerances("cpu", atol=1e-6, rtol=1e-6),
                **scripted_vs_eager_float64_tolerances("cuda", atol=1e-6, rtol=1e-6),
            },
721
722
723
724
725
726
        ),
        KernelInfo(
            F.perspective_mask,
            sample_inputs_fn=sample_inputs_perspective_mask,
            reference_fn=pil_reference_wrapper(F.perspective_image_pil),
            reference_inputs_fn=reference_inputs_perspective_mask,
727
728
729
730
            float32_vs_uint8=True,
            closeness_kwargs={
                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=10, rtol=0),
            },
731
732
733
734
        ),
        KernelInfo(
            F.perspective_video,
            sample_inputs_fn=sample_inputs_perspective_video,
735
736
            closeness_kwargs={
                **cuda_vs_cpu_pixel_difference(),
737
738
                **scripted_vs_eager_float64_tolerances("cpu", atol=1e-5, rtol=1e-5),
                **scripted_vs_eager_float64_tolerances("cuda", atol=1e-5, rtol=1e-5),
739
            },
740
741
742
743
744
        ),
    ]
)


745
746
def _get_elastic_displacement(spatial_size):
    return torch.rand(1, *spatial_size, 2)
747
748
749


def sample_inputs_elastic_image_tensor():
750
    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]):
751
        displacement = _get_elastic_displacement(image_loader.spatial_size)
752
        for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
753
754
755
756
757
            yield ArgsKwargs(image_loader, displacement=displacement, fill=fill)


def reference_inputs_elastic_image_tensor():
    for image_loader, interpolation in itertools.product(
758
        make_image_loaders_for_interpolation(),
759
760
761
762
763
764
        [
            F.InterpolationMode.NEAREST,
            F.InterpolationMode.BILINEAR,
            F.InterpolationMode.BICUBIC,
        ],
    ):
765
        displacement = _get_elastic_displacement(image_loader.spatial_size)
766
        for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
767
768
769
            yield ArgsKwargs(image_loader, interpolation=interpolation, displacement=displacement, fill=fill)


770
771
772
def sample_inputs_elastic_bounding_boxes():
    for bounding_boxes_loader in make_bounding_box_loaders():
        displacement = _get_elastic_displacement(bounding_boxes_loader.spatial_size)
773
        yield ArgsKwargs(
774
775
776
            bounding_boxes_loader,
            format=bounding_boxes_loader.format,
            spatial_size=bounding_boxes_loader.spatial_size,
777
778
779
780
781
            displacement=displacement,
        )


def sample_inputs_elastic_mask():
782
    for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]):
783
784
785
786
        displacement = _get_elastic_displacement(mask_loader.shape[-2:])
        yield ArgsKwargs(mask_loader, displacement=displacement)


787
def sample_inputs_elastic_video():
788
    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
789
790
791
792
        displacement = _get_elastic_displacement(video_loader.shape[-2:])
        yield ArgsKwargs(video_loader, displacement=displacement)


793
794
795
796
797
798
KERNEL_INFOS.extend(
    [
        KernelInfo(
            F.elastic_image_tensor,
            sample_inputs_fn=sample_inputs_elastic_image_tensor,
            reference_inputs_fn=reference_inputs_elastic_image_tensor,
799
            float32_vs_uint8=float32_vs_uint8_fill_adapter,
800
            closeness_kwargs={
801
                **float32_vs_uint8_pixel_difference(6, mae=True),
802
803
                **cuda_vs_cpu_pixel_difference(),
            },
804
            test_marks=[xfail_jit_python_scalar_arg("fill")],
805
806
        ),
        KernelInfo(
807
808
            F.elastic_bounding_boxes,
            sample_inputs_fn=sample_inputs_elastic_bounding_boxes,
809
810
811
812
        ),
        KernelInfo(
            F.elastic_mask,
            sample_inputs_fn=sample_inputs_elastic_mask,
813
814
815
816
        ),
        KernelInfo(
            F.elastic_video,
            sample_inputs_fn=sample_inputs_elastic_video,
817
            closeness_kwargs=cuda_vs_cpu_pixel_difference(),
818
819
820
821
822
        ),
    ]
)


823
_CENTER_CROP_SPATIAL_SIZES = [(16, 16), (7, 33), (31, 9)]
824
_CENTER_CROP_OUTPUT_SIZES = [[4, 3], [42, 70], [4], 3, (5, 2), (6,)]
825
826
827
828


def sample_inputs_center_crop_image_tensor():
    for image_loader, output_size in itertools.product(
829
        make_image_loaders(sizes=[(16, 17)], color_spaces=["RGB"], dtypes=[torch.float32]),
830
831
832
833
834
835
        [
            # valid `output_size` types for which cropping is applied to both dimensions
            *[5, (4,), (2, 3), [6], [3, 2]],
            # `output_size`'s for which at least one dimension needs to be padded
            *[[4, 18], [17, 5], [17, 18]],
        ],
836
837
838
839
840
841
    ):
        yield ArgsKwargs(image_loader, output_size=output_size)


def reference_inputs_center_crop_image_tensor():
    for image_loader, output_size in itertools.product(
842
843
        make_image_loaders(sizes=_CENTER_CROP_SPATIAL_SIZES, extra_dims=[()], dtypes=[torch.uint8]),
        _CENTER_CROP_OUTPUT_SIZES,
844
845
846
847
    ):
        yield ArgsKwargs(image_loader, output_size=output_size)


848
849
def sample_inputs_center_crop_bounding_boxes():
    for bounding_boxes_loader, output_size in itertools.product(make_bounding_box_loaders(), _CENTER_CROP_OUTPUT_SIZES):
850
        yield ArgsKwargs(
851
852
853
            bounding_boxes_loader,
            format=bounding_boxes_loader.format,
            spatial_size=bounding_boxes_loader.spatial_size,
854
855
856
857
858
            output_size=output_size,
        )


def sample_inputs_center_crop_mask():
859
    for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_categories=[10], num_objects=[5]):
860
861
        height, width = mask_loader.shape[-2:]
        yield ArgsKwargs(mask_loader, output_size=(height // 2, width // 2))
862
863
864
865


def reference_inputs_center_crop_mask():
    for mask_loader, output_size in itertools.product(
866
        make_mask_loaders(sizes=_CENTER_CROP_SPATIAL_SIZES, extra_dims=[()], num_objects=[1]), _CENTER_CROP_OUTPUT_SIZES
867
868
869
870
    ):
        yield ArgsKwargs(mask_loader, output_size=output_size)


871
def sample_inputs_center_crop_video():
872
    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
873
874
875
876
        height, width = video_loader.shape[-2:]
        yield ArgsKwargs(video_loader, output_size=(height // 2, width // 2))


877
878
879
880
881
882
883
KERNEL_INFOS.extend(
    [
        KernelInfo(
            F.center_crop_image_tensor,
            sample_inputs_fn=sample_inputs_center_crop_image_tensor,
            reference_fn=pil_reference_wrapper(F.center_crop_image_pil),
            reference_inputs_fn=reference_inputs_center_crop_image_tensor,
884
            float32_vs_uint8=True,
885
            test_marks=[
886
                xfail_jit_python_scalar_arg("output_size"),
887
            ],
888
889
        ),
        KernelInfo(
890
891
            F.center_crop_bounding_boxes,
            sample_inputs_fn=sample_inputs_center_crop_bounding_boxes,
892
            test_marks=[
893
                xfail_jit_python_scalar_arg("output_size"),
894
            ],
895
896
897
898
899
900
        ),
        KernelInfo(
            F.center_crop_mask,
            sample_inputs_fn=sample_inputs_center_crop_mask,
            reference_fn=pil_reference_wrapper(F.center_crop_image_pil),
            reference_inputs_fn=reference_inputs_center_crop_mask,
901
            float32_vs_uint8=True,
902
            test_marks=[
903
                xfail_jit_python_scalar_arg("output_size"),
904
            ],
905
        ),
906
907
908
909
        KernelInfo(
            F.center_crop_video,
            sample_inputs_fn=sample_inputs_center_crop_video,
        ),
910
911
912
913
914
    ]
)


def sample_inputs_gaussian_blur_image_tensor():
915
    make_gaussian_blur_image_loaders = functools.partial(make_image_loaders, sizes=[(7, 33)], color_spaces=["RGB"])
916
917
918
919
920
921

    for image_loader, kernel_size in itertools.product(make_gaussian_blur_image_loaders(), [5, (3, 3), [3, 3]]):
        yield ArgsKwargs(image_loader, kernel_size=kernel_size)

    for image_loader, sigma in itertools.product(
        make_gaussian_blur_image_loaders(), [None, (3.0, 3.0), [2.0, 2.0], 4.0, [1.5], (3.14,)]
922
    ):
923
        yield ArgsKwargs(image_loader, kernel_size=5, sigma=sigma)
924
925


926
def sample_inputs_gaussian_blur_video():
927
    for video_loader in make_video_loaders(sizes=[(7, 33)], num_frames=[5]):
928
929
930
931
932
933
934
935
        yield ArgsKwargs(video_loader, kernel_size=[3, 3])


KERNEL_INFOS.extend(
    [
        KernelInfo(
            F.gaussian_blur_image_tensor,
            sample_inputs_fn=sample_inputs_gaussian_blur_image_tensor,
936
            closeness_kwargs=cuda_vs_cpu_pixel_difference(),
937
938
939
940
941
942
943
944
            test_marks=[
                xfail_jit_python_scalar_arg("kernel_size"),
                xfail_jit_python_scalar_arg("sigma"),
            ],
        ),
        KernelInfo(
            F.gaussian_blur_video,
            sample_inputs_fn=sample_inputs_gaussian_blur_video,
945
            closeness_kwargs=cuda_vs_cpu_pixel_difference(),
946
947
        ),
    ]
948
949
950
951
)


def sample_inputs_equalize_image_tensor():
952
    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")):
953
954
955
956
        yield ArgsKwargs(image_loader)


def reference_inputs_equalize_image_tensor():
957
958
959
    # We are not using `make_image_loaders` here since that uniformly samples the values over the whole value range.
    # Since the whole point of this kernel is to transform an arbitrary distribution of values into a uniform one,
    # the information gain is low if we already provide something really close to the expected value.
960
    def make_uniform_band_image(shape, dtype, device, *, low_factor, high_factor, memory_format):
961
962
963
964
965
966
967
        if dtype.is_floating_point:
            low = low_factor
            high = high_factor
        else:
            max_value = torch.iinfo(dtype).max
            low = int(low_factor * max_value)
            high = int(high_factor * max_value)
968
969
970
        return torch.testing.make_tensor(shape, dtype=dtype, device=device, low=low, high=high).to(
            memory_format=memory_format, copy=True
        )
971

972
    def make_beta_distributed_image(shape, dtype, device, *, alpha, beta, memory_format):
973
974
975
        image = torch.distributions.Beta(alpha, beta).sample(shape)
        if not dtype.is_floating_point:
            image.mul_(torch.iinfo(dtype).max).round_()
976
        return image.to(dtype=dtype, device=device, memory_format=memory_format, copy=True)
977

978
    spatial_size = (256, 256)
979
    for dtype, color_space, fn in itertools.product(
980
        [torch.uint8],
981
        ["GRAY", "RGB"],
982
        [
983
984
            lambda shape, dtype, device, memory_format: torch.zeros(shape, dtype=dtype, device=device).to(
                memory_format=memory_format, copy=True
985
            ),
986
987
988
            lambda shape, dtype, device, memory_format: torch.full(
                shape, 1.0 if dtype.is_floating_point else torch.iinfo(dtype).max, dtype=dtype, device=device
            ).to(memory_format=memory_format, copy=True),
989
            *[
990
991
992
993
994
                functools.partial(make_uniform_band_image, low_factor=low_factor, high_factor=high_factor)
                for low_factor, high_factor in [
                    (0.0, 0.25),
                    (0.25, 0.75),
                    (0.75, 1.0),
995
996
997
                ]
            ],
            *[
998
                functools.partial(make_beta_distributed_image, alpha=alpha, beta=beta)
999
1000
1001
1002
1003
1004
1005
1006
                for alpha, beta in [
                    (0.5, 0.5),
                    (2, 2),
                    (2, 5),
                    (5, 2),
                ]
            ],
        ],
1007
    ):
1008
        image_loader = ImageLoader(fn, shape=(get_num_channels(color_space), *spatial_size), dtype=dtype)
1009
1010
1011
        yield ArgsKwargs(image_loader)


1012
def sample_inputs_equalize_video():
1013
    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
        yield ArgsKwargs(video_loader)


KERNEL_INFOS.extend(
    [
        KernelInfo(
            F.equalize_image_tensor,
            kernel_name="equalize_image_tensor",
            sample_inputs_fn=sample_inputs_equalize_image_tensor,
            reference_fn=pil_reference_wrapper(F.equalize_image_pil),
1024
            float32_vs_uint8=True,
1025
1026
1027
1028
1029
1030
1031
            reference_inputs_fn=reference_inputs_equalize_image_tensor,
        ),
        KernelInfo(
            F.equalize_video,
            sample_inputs_fn=sample_inputs_equalize_video,
        ),
    ]
1032
1033
1034
1035
)


def sample_inputs_invert_image_tensor():
1036
    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")):
1037
1038
1039
1040
        yield ArgsKwargs(image_loader)


def reference_inputs_invert_image_tensor():
1041
    for image_loader in make_image_loaders(color_spaces=("GRAY", "RGB"), extra_dims=[()], dtypes=[torch.uint8]):
1042
1043
1044
        yield ArgsKwargs(image_loader)


1045
def sample_inputs_invert_video():
1046
    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
        yield ArgsKwargs(video_loader)


KERNEL_INFOS.extend(
    [
        KernelInfo(
            F.invert_image_tensor,
            kernel_name="invert_image_tensor",
            sample_inputs_fn=sample_inputs_invert_image_tensor,
            reference_fn=pil_reference_wrapper(F.invert_image_pil),
            reference_inputs_fn=reference_inputs_invert_image_tensor,
1058
            float32_vs_uint8=True,
1059
1060
1061
1062
1063
1064
        ),
        KernelInfo(
            F.invert_video,
            sample_inputs_fn=sample_inputs_invert_video,
        ),
    ]
1065
1066
1067
1068
1069
1070
1071
)


_POSTERIZE_BITS = [1, 4, 8]


def sample_inputs_posterize_image_tensor():
1072
    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")):
1073
1074
1075
1076
1077
        yield ArgsKwargs(image_loader, bits=_POSTERIZE_BITS[0])


def reference_inputs_posterize_image_tensor():
    for image_loader, bits in itertools.product(
1078
        make_image_loaders(color_spaces=("GRAY", "RGB"), extra_dims=[()], dtypes=[torch.uint8]),
1079
1080
1081
1082
1083
        _POSTERIZE_BITS,
    ):
        yield ArgsKwargs(image_loader, bits=bits)


1084
def sample_inputs_posterize_video():
1085
    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
        yield ArgsKwargs(video_loader, bits=_POSTERIZE_BITS[0])


KERNEL_INFOS.extend(
    [
        KernelInfo(
            F.posterize_image_tensor,
            kernel_name="posterize_image_tensor",
            sample_inputs_fn=sample_inputs_posterize_image_tensor,
            reference_fn=pil_reference_wrapper(F.posterize_image_pil),
            reference_inputs_fn=reference_inputs_posterize_image_tensor,
1097
1098
            float32_vs_uint8=True,
            closeness_kwargs=float32_vs_uint8_pixel_difference(),
1099
1100
1101
1102
1103
1104
        ),
        KernelInfo(
            F.posterize_video,
            sample_inputs_fn=sample_inputs_posterize_video,
        ),
    ]
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
)


def _get_solarize_thresholds(dtype):
    for factor in [0.1, 0.5]:
        max_value = get_max_value(dtype)
        yield (float if dtype.is_floating_point else int)(max_value * factor)


def sample_inputs_solarize_image_tensor():
1115
    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")):
1116
1117
1118
1119
        yield ArgsKwargs(image_loader, threshold=next(_get_solarize_thresholds(image_loader.dtype)))


def reference_inputs_solarize_image_tensor():
1120
    for image_loader in make_image_loaders(color_spaces=("GRAY", "RGB"), extra_dims=[()], dtypes=[torch.uint8]):
1121
1122
1123
1124
        for threshold in _get_solarize_thresholds(image_loader.dtype):
            yield ArgsKwargs(image_loader, threshold=threshold)


1125
1126
1127
1128
def uint8_to_float32_threshold_adapter(other_args, kwargs):
    return other_args, dict(threshold=kwargs["threshold"] / 255)


1129
def sample_inputs_solarize_video():
1130
    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
        yield ArgsKwargs(video_loader, threshold=next(_get_solarize_thresholds(video_loader.dtype)))


KERNEL_INFOS.extend(
    [
        KernelInfo(
            F.solarize_image_tensor,
            kernel_name="solarize_image_tensor",
            sample_inputs_fn=sample_inputs_solarize_image_tensor,
            reference_fn=pil_reference_wrapper(F.solarize_image_pil),
            reference_inputs_fn=reference_inputs_solarize_image_tensor,
1142
1143
            float32_vs_uint8=uint8_to_float32_threshold_adapter,
            closeness_kwargs=float32_vs_uint8_pixel_difference(),
1144
1145
1146
1147
1148
1149
        ),
        KernelInfo(
            F.solarize_video,
            sample_inputs_fn=sample_inputs_solarize_video,
        ),
    ]
1150
1151
1152
1153
)


def sample_inputs_autocontrast_image_tensor():
1154
    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")):
1155
1156
1157
1158
        yield ArgsKwargs(image_loader)


def reference_inputs_autocontrast_image_tensor():
1159
    for image_loader in make_image_loaders(color_spaces=("GRAY", "RGB"), extra_dims=[()], dtypes=[torch.uint8]):
1160
1161
1162
        yield ArgsKwargs(image_loader)


1163
def sample_inputs_autocontrast_video():
1164
    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
        yield ArgsKwargs(video_loader)


KERNEL_INFOS.extend(
    [
        KernelInfo(
            F.autocontrast_image_tensor,
            kernel_name="autocontrast_image_tensor",
            sample_inputs_fn=sample_inputs_autocontrast_image_tensor,
            reference_fn=pil_reference_wrapper(F.autocontrast_image_pil),
            reference_inputs_fn=reference_inputs_autocontrast_image_tensor,
1176
1177
1178
1179
1180
            float32_vs_uint8=True,
            closeness_kwargs={
                **pil_reference_pixel_difference(),
                **float32_vs_uint8_pixel_difference(),
            },
1181
1182
1183
1184
1185
1186
        ),
        KernelInfo(
            F.autocontrast_video,
            sample_inputs_fn=sample_inputs_autocontrast_video,
        ),
    ]
1187
1188
1189
1190
1191
1192
1193
)

_ADJUST_SHARPNESS_FACTORS = [0.1, 0.5]


def sample_inputs_adjust_sharpness_image_tensor():
    for image_loader in make_image_loaders(
1194
        sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE, (2, 2)],
1195
        color_spaces=("GRAY", "RGB"),
1196
1197
1198
1199
1200
1201
    ):
        yield ArgsKwargs(image_loader, sharpness_factor=_ADJUST_SHARPNESS_FACTORS[0])


def reference_inputs_adjust_sharpness_image_tensor():
    for image_loader, sharpness_factor in itertools.product(
1202
        make_image_loaders(color_spaces=("GRAY", "RGB"), extra_dims=[()], dtypes=[torch.uint8]),
1203
1204
1205
1206
1207
        _ADJUST_SHARPNESS_FACTORS,
    ):
        yield ArgsKwargs(image_loader, sharpness_factor=sharpness_factor)


1208
def sample_inputs_adjust_sharpness_video():
1209
    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
        yield ArgsKwargs(video_loader, sharpness_factor=_ADJUST_SHARPNESS_FACTORS[0])


KERNEL_INFOS.extend(
    [
        KernelInfo(
            F.adjust_sharpness_image_tensor,
            kernel_name="adjust_sharpness_image_tensor",
            sample_inputs_fn=sample_inputs_adjust_sharpness_image_tensor,
            reference_fn=pil_reference_wrapper(F.adjust_sharpness_image_pil),
            reference_inputs_fn=reference_inputs_adjust_sharpness_image_tensor,
1221
1222
            float32_vs_uint8=True,
            closeness_kwargs=float32_vs_uint8_pixel_difference(2),
1223
1224
1225
1226
1227
1228
        ),
        KernelInfo(
            F.adjust_sharpness_video,
            sample_inputs_fn=sample_inputs_adjust_sharpness_video,
        ),
    ]
1229
1230
1231
1232
)


def sample_inputs_erase_image_tensor():
1233
    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]):
1234
1235
1236
1237
1238
1239
        # FIXME: make the parameters more diverse
        h, w = 6, 7
        v = torch.rand(image_loader.num_channels, h, w)
        yield ArgsKwargs(image_loader, i=1, j=2, h=h, w=w, v=v)


1240
def sample_inputs_erase_video():
1241
    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
        # FIXME: make the parameters more diverse
        h, w = 6, 7
        v = torch.rand(video_loader.num_channels, h, w)
        yield ArgsKwargs(video_loader, i=1, j=2, h=h, w=w, v=v)


KERNEL_INFOS.extend(
    [
        KernelInfo(
            F.erase_image_tensor,
            kernel_name="erase_image_tensor",
            sample_inputs_fn=sample_inputs_erase_image_tensor,
        ),
        KernelInfo(
            F.erase_video,
            sample_inputs_fn=sample_inputs_erase_video,
        ),
    ]
1260
)
1261
1262
1263
1264
1265

_ADJUST_BRIGHTNESS_FACTORS = [0.1, 0.5]


def sample_inputs_adjust_brightness_image_tensor():
1266
    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")):
1267
1268
1269
1270
1271
        yield ArgsKwargs(image_loader, brightness_factor=_ADJUST_BRIGHTNESS_FACTORS[0])


def reference_inputs_adjust_brightness_image_tensor():
    for image_loader, brightness_factor in itertools.product(
1272
        make_image_loaders(color_spaces=("GRAY", "RGB"), extra_dims=[()], dtypes=[torch.uint8]),
1273
1274
1275
1276
1277
        _ADJUST_BRIGHTNESS_FACTORS,
    ):
        yield ArgsKwargs(image_loader, brightness_factor=brightness_factor)


1278
def sample_inputs_adjust_brightness_video():
1279
    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
        yield ArgsKwargs(video_loader, brightness_factor=_ADJUST_BRIGHTNESS_FACTORS[0])


KERNEL_INFOS.extend(
    [
        KernelInfo(
            F.adjust_brightness_image_tensor,
            kernel_name="adjust_brightness_image_tensor",
            sample_inputs_fn=sample_inputs_adjust_brightness_image_tensor,
            reference_fn=pil_reference_wrapper(F.adjust_brightness_image_pil),
            reference_inputs_fn=reference_inputs_adjust_brightness_image_tensor,
1291
1292
            float32_vs_uint8=True,
            closeness_kwargs=float32_vs_uint8_pixel_difference(),
1293
1294
1295
1296
1297
1298
        ),
        KernelInfo(
            F.adjust_brightness_video,
            sample_inputs_fn=sample_inputs_adjust_brightness_video,
        ),
    ]
1299
1300
1301
1302
1303
1304
1305
)


_ADJUST_CONTRAST_FACTORS = [0.1, 0.5]


def sample_inputs_adjust_contrast_image_tensor():
1306
    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")):
1307
1308
1309
1310
1311
        yield ArgsKwargs(image_loader, contrast_factor=_ADJUST_CONTRAST_FACTORS[0])


def reference_inputs_adjust_contrast_image_tensor():
    for image_loader, contrast_factor in itertools.product(
1312
        make_image_loaders(color_spaces=("GRAY", "RGB"), extra_dims=[()], dtypes=[torch.uint8]),
1313
1314
1315
1316
1317
        _ADJUST_CONTRAST_FACTORS,
    ):
        yield ArgsKwargs(image_loader, contrast_factor=contrast_factor)


1318
def sample_inputs_adjust_contrast_video():
1319
    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
        yield ArgsKwargs(video_loader, contrast_factor=_ADJUST_CONTRAST_FACTORS[0])


KERNEL_INFOS.extend(
    [
        KernelInfo(
            F.adjust_contrast_image_tensor,
            kernel_name="adjust_contrast_image_tensor",
            sample_inputs_fn=sample_inputs_adjust_contrast_image_tensor,
            reference_fn=pil_reference_wrapper(F.adjust_contrast_image_pil),
            reference_inputs_fn=reference_inputs_adjust_contrast_image_tensor,
1331
1332
1333
1334
            float32_vs_uint8=True,
            closeness_kwargs={
                **pil_reference_pixel_difference(),
                **float32_vs_uint8_pixel_difference(2),
1335
                **cuda_vs_cpu_pixel_difference(),
1336
                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): pixel_difference_closeness_kwargs(1),
1337
            },
1338
1339
1340
1341
        ),
        KernelInfo(
            F.adjust_contrast_video,
            sample_inputs_fn=sample_inputs_adjust_contrast_video,
1342
1343
1344
1345
            closeness_kwargs={
                **cuda_vs_cpu_pixel_difference(),
                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): pixel_difference_closeness_kwargs(1),
            },
1346
1347
        ),
    ]
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
)

_ADJUST_GAMMA_GAMMAS_GAINS = [
    (0.5, 2.0),
    (0.0, 1.0),
]


def sample_inputs_adjust_gamma_image_tensor():
    gamma, gain = _ADJUST_GAMMA_GAMMAS_GAINS[0]
1358
    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")):
1359
1360
1361
1362
1363
        yield ArgsKwargs(image_loader, gamma=gamma, gain=gain)


def reference_inputs_adjust_gamma_image_tensor():
    for image_loader, (gamma, gain) in itertools.product(
1364
        make_image_loaders(color_spaces=("GRAY", "RGB"), extra_dims=[()], dtypes=[torch.uint8]),
1365
1366
1367
1368
1369
        _ADJUST_GAMMA_GAMMAS_GAINS,
    ):
        yield ArgsKwargs(image_loader, gamma=gamma, gain=gain)


1370
1371
def sample_inputs_adjust_gamma_video():
    gamma, gain = _ADJUST_GAMMA_GAMMAS_GAINS[0]
1372
    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
        yield ArgsKwargs(video_loader, gamma=gamma, gain=gain)


KERNEL_INFOS.extend(
    [
        KernelInfo(
            F.adjust_gamma_image_tensor,
            kernel_name="adjust_gamma_image_tensor",
            sample_inputs_fn=sample_inputs_adjust_gamma_image_tensor,
            reference_fn=pil_reference_wrapper(F.adjust_gamma_image_pil),
            reference_inputs_fn=reference_inputs_adjust_gamma_image_tensor,
1384
1385
1386
1387
1388
            float32_vs_uint8=True,
            closeness_kwargs={
                **pil_reference_pixel_difference(),
                **float32_vs_uint8_pixel_difference(),
            },
1389
1390
1391
1392
1393
1394
        ),
        KernelInfo(
            F.adjust_gamma_video,
            sample_inputs_fn=sample_inputs_adjust_gamma_video,
        ),
    ]
1395
1396
1397
1398
1399
1400
1401
)


_ADJUST_HUE_FACTORS = [-0.1, 0.5]


def sample_inputs_adjust_hue_image_tensor():
1402
    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")):
1403
1404
1405
1406
1407
        yield ArgsKwargs(image_loader, hue_factor=_ADJUST_HUE_FACTORS[0])


def reference_inputs_adjust_hue_image_tensor():
    for image_loader, hue_factor in itertools.product(
1408
        make_image_loaders(color_spaces=("GRAY", "RGB"), extra_dims=[()], dtypes=[torch.uint8]),
1409
1410
1411
1412
1413
        _ADJUST_HUE_FACTORS,
    ):
        yield ArgsKwargs(image_loader, hue_factor=hue_factor)


1414
def sample_inputs_adjust_hue_video():
1415
    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
        yield ArgsKwargs(video_loader, hue_factor=_ADJUST_HUE_FACTORS[0])


KERNEL_INFOS.extend(
    [
        KernelInfo(
            F.adjust_hue_image_tensor,
            kernel_name="adjust_hue_image_tensor",
            sample_inputs_fn=sample_inputs_adjust_hue_image_tensor,
            reference_fn=pil_reference_wrapper(F.adjust_hue_image_pil),
            reference_inputs_fn=reference_inputs_adjust_hue_image_tensor,
1427
1428
            float32_vs_uint8=True,
            closeness_kwargs={
1429
                **pil_reference_pixel_difference(2, mae=True),
1430
1431
                **float32_vs_uint8_pixel_difference(),
            },
1432
1433
1434
1435
1436
1437
        ),
        KernelInfo(
            F.adjust_hue_video,
            sample_inputs_fn=sample_inputs_adjust_hue_video,
        ),
    ]
1438
1439
1440
1441
1442
1443
)

_ADJUST_SATURATION_FACTORS = [0.1, 0.5]


def sample_inputs_adjust_saturation_image_tensor():
1444
    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")):
1445
1446
1447
1448
1449
        yield ArgsKwargs(image_loader, saturation_factor=_ADJUST_SATURATION_FACTORS[0])


def reference_inputs_adjust_saturation_image_tensor():
    for image_loader, saturation_factor in itertools.product(
1450
        make_image_loaders(color_spaces=("GRAY", "RGB"), extra_dims=[()], dtypes=[torch.uint8]),
1451
1452
1453
1454
1455
        _ADJUST_SATURATION_FACTORS,
    ):
        yield ArgsKwargs(image_loader, saturation_factor=saturation_factor)


1456
def sample_inputs_adjust_saturation_video():
1457
    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
        yield ArgsKwargs(video_loader, saturation_factor=_ADJUST_SATURATION_FACTORS[0])


KERNEL_INFOS.extend(
    [
        KernelInfo(
            F.adjust_saturation_image_tensor,
            kernel_name="adjust_saturation_image_tensor",
            sample_inputs_fn=sample_inputs_adjust_saturation_image_tensor,
            reference_fn=pil_reference_wrapper(F.adjust_saturation_image_pil),
            reference_inputs_fn=reference_inputs_adjust_saturation_image_tensor,
1469
1470
1471
1472
            float32_vs_uint8=True,
            closeness_kwargs={
                **pil_reference_pixel_difference(),
                **float32_vs_uint8_pixel_difference(2),
1473
                **cuda_vs_cpu_pixel_difference(),
1474
            },
1475
1476
1477
1478
        ),
        KernelInfo(
            F.adjust_saturation_video,
            sample_inputs_fn=sample_inputs_adjust_saturation_video,
1479
            closeness_kwargs=cuda_vs_cpu_pixel_difference(),
1480
1481
        ),
    ]
1482
1483
1484
)


1485
1486
def sample_inputs_clamp_bounding_boxes():
    for bounding_boxes_loader in make_bounding_box_loaders():
1487
        yield ArgsKwargs(
1488
1489
1490
            bounding_boxes_loader,
            format=bounding_boxes_loader.format,
            spatial_size=bounding_boxes_loader.spatial_size,
1491
1492
1493
1494
1495
        )


KERNEL_INFOS.append(
    KernelInfo(
1496
1497
        F.clamp_bounding_boxes,
        sample_inputs_fn=sample_inputs_clamp_bounding_boxes,
1498
        logs_usage=True,
1499
1500
1501
1502
1503
1504
    )
)

_FIVE_TEN_CROP_SIZES = [7, (6,), [5], (6, 5), [7, 6]]


1505
def _get_five_ten_crop_spatial_size(size):
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
    if isinstance(size, int):
        crop_height = crop_width = size
    elif len(size) == 1:
        crop_height = crop_width = size[0]
    else:
        crop_height, crop_width = size
    return 2 * crop_height, 2 * crop_width


def sample_inputs_five_crop_image_tensor():
    for size in _FIVE_TEN_CROP_SIZES:
1517
        for image_loader in make_image_loaders(
1518
            sizes=[_get_five_ten_crop_spatial_size(size)],
1519
            color_spaces=["RGB"],
1520
            dtypes=[torch.float32],
1521
        ):
1522
1523
1524
1525
1526
            yield ArgsKwargs(image_loader, size=size)


def reference_inputs_five_crop_image_tensor():
    for size in _FIVE_TEN_CROP_SIZES:
1527
1528
1529
        for image_loader in make_image_loaders(
            sizes=[_get_five_ten_crop_spatial_size(size)], extra_dims=[()], dtypes=[torch.uint8]
        ):
1530
1531
1532
            yield ArgsKwargs(image_loader, size=size)


1533
1534
1535
1536
1537
1538
def sample_inputs_five_crop_video():
    size = _FIVE_TEN_CROP_SIZES[0]
    for video_loader in make_video_loaders(sizes=[_get_five_ten_crop_spatial_size(size)]):
        yield ArgsKwargs(video_loader, size=size)


1539
1540
def sample_inputs_ten_crop_image_tensor():
    for size, vertical_flip in itertools.product(_FIVE_TEN_CROP_SIZES, [False, True]):
1541
        for image_loader in make_image_loaders(
1542
            sizes=[_get_five_ten_crop_spatial_size(size)],
1543
            color_spaces=["RGB"],
1544
            dtypes=[torch.float32],
1545
        ):
1546
1547
1548
1549
1550
            yield ArgsKwargs(image_loader, size=size, vertical_flip=vertical_flip)


def reference_inputs_ten_crop_image_tensor():
    for size, vertical_flip in itertools.product(_FIVE_TEN_CROP_SIZES, [False, True]):
1551
1552
1553
        for image_loader in make_image_loaders(
            sizes=[_get_five_ten_crop_spatial_size(size)], extra_dims=[()], dtypes=[torch.uint8]
        ):
1554
1555
1556
            yield ArgsKwargs(image_loader, size=size, vertical_flip=vertical_flip)


1557
1558
1559
1560
1561
1562
def sample_inputs_ten_crop_video():
    size = _FIVE_TEN_CROP_SIZES[0]
    for video_loader in make_video_loaders(sizes=[_get_five_ten_crop_spatial_size(size)]):
        yield ArgsKwargs(video_loader, size=size)


1563
1564
1565
1566
def multi_crop_pil_reference_wrapper(pil_kernel):
    def wrapper(input_tensor, *other_args, **kwargs):
        output = pil_reference_wrapper(pil_kernel)(input_tensor, *other_args, **kwargs)
        return type(output)(
1567
            F.to_dtype_image_tensor(F.to_image_tensor(output_pil), dtype=input_tensor.dtype, scale=True)
1568
1569
1570
1571
1572
1573
            for output_pil in output
        )

    return wrapper


1574
1575
1576
1577
1578
_common_five_ten_crop_marks = [
    xfail_jit_python_scalar_arg("size"),
    mark_framework_limitation(("TestKernels", "test_batched_vs_single"), "Custom batching needed."),
]

1579
1580
1581
1582
1583
KERNEL_INFOS.extend(
    [
        KernelInfo(
            F.five_crop_image_tensor,
            sample_inputs_fn=sample_inputs_five_crop_image_tensor,
1584
            reference_fn=multi_crop_pil_reference_wrapper(F.five_crop_image_pil),
1585
            reference_inputs_fn=reference_inputs_five_crop_image_tensor,
1586
            test_marks=_common_five_ten_crop_marks,
1587
        ),
1588
1589
1590
1591
1592
        KernelInfo(
            F.five_crop_video,
            sample_inputs_fn=sample_inputs_five_crop_video,
            test_marks=_common_five_ten_crop_marks,
        ),
1593
1594
1595
        KernelInfo(
            F.ten_crop_image_tensor,
            sample_inputs_fn=sample_inputs_ten_crop_image_tensor,
1596
            reference_fn=multi_crop_pil_reference_wrapper(F.ten_crop_image_pil),
1597
            reference_inputs_fn=reference_inputs_ten_crop_image_tensor,
1598
            test_marks=_common_five_ten_crop_marks,
1599
        ),
1600
1601
1602
1603
1604
        KernelInfo(
            F.ten_crop_video,
            sample_inputs_fn=sample_inputs_ten_crop_video,
            test_marks=_common_five_ten_crop_marks,
        ),
1605
1606
1607
1608
1609
1610
    ]
)

_NORMALIZE_MEANS_STDS = [
    ((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ([0.0, 0.0, 0.0], [1.0, 1.0, 1.0]),
1611
    (0.5, 2.0),
1612
1613
1614
1615
1616
]


def sample_inputs_normalize_image_tensor():
    for image_loader, (mean, std) in itertools.product(
1617
        make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], dtypes=[torch.float32]),
1618
1619
1620
1621
1622
        _NORMALIZE_MEANS_STDS,
    ):
        yield ArgsKwargs(image_loader, mean=mean, std=std)


1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
def reference_normalize_image_tensor(image, mean, std, inplace=False):
    mean = torch.tensor(mean).view(-1, 1, 1)
    std = torch.tensor(std).view(-1, 1, 1)

    sub = torch.Tensor.sub_ if inplace else torch.Tensor.sub
    return sub(image, mean).div_(std)


def reference_inputs_normalize_image_tensor():
    yield ArgsKwargs(
1633
        make_image_loader(size=(32, 32), color_space="RGB", extra_dims=[1]),
1634
1635
1636
1637
1638
        mean=[0.5, 0.5, 0.5],
        std=[1.0, 1.0, 1.0],
    )


1639
1640
1641
def sample_inputs_normalize_video():
    mean, std = _NORMALIZE_MEANS_STDS[0]
    for video_loader in make_video_loaders(
1642
        sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], num_frames=[3], dtypes=[torch.float32]
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
    ):
        yield ArgsKwargs(video_loader, mean=mean, std=std)


KERNEL_INFOS.extend(
    [
        KernelInfo(
            F.normalize_image_tensor,
            kernel_name="normalize_image_tensor",
            sample_inputs_fn=sample_inputs_normalize_image_tensor,
1653
1654
            reference_fn=reference_normalize_image_tensor,
            reference_inputs_fn=reference_inputs_normalize_image_tensor,
1655
1656
1657
1658
            test_marks=[
                xfail_jit_python_scalar_arg("mean"),
                xfail_jit_python_scalar_arg("std"),
            ],
1659
1660
1661
1662
1663
1664
        ),
        KernelInfo(
            F.normalize_video,
            sample_inputs_fn=sample_inputs_normalize_video,
        ),
    ]
1665
)
1666
1667


1668
def sample_inputs_uniform_temporal_subsample_video():
1669
    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[4]):
1670
        yield ArgsKwargs(video_loader, num_samples=2)
1671
1672


1673
def reference_uniform_temporal_subsample_video(x, num_samples):
1674
1675
    # Copy-pasted from
    # https://github.com/facebookresearch/pytorchvideo/blob/c8d23d8b7e597586a9e2d18f6ed31ad8aa379a7a/pytorchvideo/transforms/functional.py#L19
1676
    t = x.shape[-4]
1677
1678
1679
1680
    assert num_samples > 0 and t > 0
    # Sample by nearest neighbor interpolation if num_samples > t.
    indices = torch.linspace(0, t - 1, num_samples)
    indices = torch.clamp(indices, 0, t - 1).long()
1681
    return torch.index_select(x, -4, indices)
1682
1683
1684


def reference_inputs_uniform_temporal_subsample_video():
1685
1686
1687
    for video_loader in make_video_loaders(
        sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], num_frames=[10]
    ):
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
        for num_samples in range(1, video_loader.shape[-4] + 1):
            yield ArgsKwargs(video_loader, num_samples)


KERNEL_INFOS.append(
    KernelInfo(
        F.uniform_temporal_subsample_video,
        sample_inputs_fn=sample_inputs_uniform_temporal_subsample_video,
        reference_fn=reference_uniform_temporal_subsample_video,
        reference_inputs_fn=reference_inputs_uniform_temporal_subsample_video,
    )
)