[CHERRYPICK] PIL fill len 1 seq / float fill for int images (#7951)

a90e5846 · Philip Meier · GitHub · eab7cfbf · a90e5846 · a90e5846
Unverified Commit a90e5846 authored Sep 08, 2023 by Philip Meier Committed by GitHub Sep 08, 2023
4 changed files
--- a/test/test_transforms_v2_refactored.py
+++ b/test/test_transforms_v2_refactored.py
@@ -309,11 +309,12 @@ def adapt_fill(value, *, dtype):
        return value

    max_value = get_max_value(dtype)
+    value_type = float if dtype.is_floating_point else int

    if isinstance(value, (int, float)):
-        return type(value)(value * max_value)
+        return value_type(value * max_value)
    elif isinstance(value, (list, tuple)):
-        return type(value)(type(v)(v * max_value) for v in value)
+        return type(value)(value_type(v * max_value) for v in value)
    else:
        raise ValueError(f"fill should be an int or float, or a list or tuple of the former, but got '{value}'.")

@@ -414,6 +415,10 @@ def reference_affine_bounding_boxes_helper(bounding_boxes, *, affine_matrix, new
    )


+# turns all warnings into errors for this module
+pytestmark = pytest.mark.filterwarnings("error")
+
+
 class TestResize:
    INPUT_SIZE = (17, 11)
    OUTPUT_SIZES = [17, [17], (17,), [12, 13], (12, 13)]
@@ -2575,18 +2580,19 @@ class TestCrop:
    def test_transform(self, param, value, make_input):
        input = make_input(self.INPUT_SIZE)

-        kwargs = {param: value}
        if param == "fill":
-            # 1. size is required
-            # 2. the fill parameter only has an affect if we need padding
-            kwargs["size"] = [s + 4 for s in self.INPUT_SIZE]
-
-            if isinstance(input, PIL.Image.Image) and isinstance(value, (tuple, list)) and len(value) == 1:
-                pytest.xfail("F._pad_image_pil does not support sequences of length 1 for fill.")
-
            if isinstance(input, tv_tensors.Mask) and isinstance(value, (tuple, list)):
                pytest.skip("F.pad_mask doesn't support non-scalar fill.")

+            kwargs = dict(
+                # 1. size is required
+                # 2. the fill parameter only has an affect if we need padding
+                size=[s + 4 for s in self.INPUT_SIZE],
+                fill=adapt_fill(value, dtype=input.dtype if isinstance(input, torch.Tensor) else torch.uint8),
+            )
+        else:
+            kwargs = {param: value}
+
        check_transform(
            transforms.RandomCrop(**kwargs, pad_if_needed=True),
            input,

--- a/test/transforms_v2_dispatcher_infos.py
+++ b/test/transforms_v2_dispatcher_infos.py
-import collections.abc
-
 import pytest
 import torchvision.transforms.v2.functional as F
 from torchvision import tv_tensors
@@ -112,32 +110,6 @@ multi_crop_skips = [
 multi_crop_skips.append(skip_dispatch_tv_tensor)


-def xfails_pil(reason, *, condition=None):
-    return [
-        TestMark(("TestDispatchers", test_name), pytest.mark.xfail(reason=reason), condition=condition)
-        for test_name in ["test_dispatch_pil", "test_pil_output_type"]
-    ]
-
-
-def fill_sequence_needs_broadcast(args_kwargs):
-    (image_loader, *_), kwargs = args_kwargs
-    try:
-        fill = kwargs["fill"]
-    except KeyError:
-        return False
-
-    if not isinstance(fill, collections.abc.Sequence) or len(fill) > 1:
-        return False
-
-    return image_loader.num_channels > 1
-
-
-xfails_pil_if_fill_sequence_needs_broadcast = xfails_pil(
-    "PIL kernel doesn't support sequences of length 1 for `fill` if the number of color channels is larger.",
-    condition=fill_sequence_needs_broadcast,
-)
-
-
 DISPATCHER_INFOS = [
    DispatcherInfo(
        F.resized_crop,
@@ -159,14 +131,6 @@ DISPATCHER_INFOS = [
        },
        pil_kernel_info=PILKernelInfo(F._pad_image_pil, kernel_name="pad_image_pil"),
        test_marks=[
-            *xfails_pil(
-                reason=(
-                    "PIL kernel doesn't support sequences of length 1 for argument `fill` and "
-                    "`padding_mode='constant'`, if the number of color channels is larger."
-                ),
-                condition=lambda args_kwargs: fill_sequence_needs_broadcast(args_kwargs)
-                and args_kwargs.kwargs.get("padding_mode", "constant") == "constant",
-            ),
            xfail_jit("F.pad only supports vector fills for list of floats", condition=pad_xfail_jit_fill_condition),
            xfail_jit_python_scalar_arg("padding"),
        ],
@@ -181,7 +145,6 @@ DISPATCHER_INFOS = [
        },
        pil_kernel_info=PILKernelInfo(F._perspective_image_pil),
        test_marks=[
-            *xfails_pil_if_fill_sequence_needs_broadcast,
            xfail_jit_python_scalar_arg("fill"),
        ],
    ),

--- a/torchvision/transforms/_functional_pil.py
+++ b/torchvision/transforms/_functional_pil.py
@@ -264,11 +264,13 @@ def _parse_fill(
    if isinstance(fill, (int, float)) and num_channels > 1:
        fill = tuple([fill] * num_channels)
    if isinstance(fill, (list, tuple)):
-        if len(fill) != num_channels:
+        if len(fill) == 1:
+            fill = fill * num_channels
+        elif len(fill) != num_channels:
            msg = "The number of elements in 'fill' does not match the number of channels of the image ({} != {})"
            raise ValueError(msg.format(len(fill), num_channels))

-        fill = tuple(fill)
+        fill = tuple(fill)  # type: ignore[arg-type]

    if img.mode != "F":
        if isinstance(fill, (list, tuple)):

--- a/torchvision/transforms/v2/functional/_geometry.py
+++ b/torchvision/transforms/v2/functional/_geometry.py
@@ -1235,7 +1235,11 @@ def _pad_with_vector_fill(

    output = _pad_with_scalar_fill(image, torch_padding, fill=0, padding_mode="constant")
    left, right, top, bottom = torch_padding
-    fill = torch.tensor(fill, dtype=image.dtype, device=image.device).reshape(-1, 1, 1)
+
+    # We are creating the tensor in the autodetected dtype first and convert to the right one after to avoid an implicit
+    # float -> int conversion. That happens for example for the valid input of a uint8 image with floating point fill
+    # value.
+    fill = torch.tensor(fill, device=image.device).to(dtype=image.dtype).reshape(-1, 1, 1)

    if top > 0:
        output[..., :top, :] = fill