[prototype] Restore BC on perspective (#6902)

* Restore BC on perspective * Fixes linter * Fixing tests. * Apply code-review changes. * Pleasing mypy. * Revert named parameters.

[prototype] Restore BC on perspective (#6902)
* Restore BC on perspective * Fixes linter * Fixing tests. * Apply code-review changes. * Pleasing mypy. * Revert named parameters.
dc11b1f6 · Vasilis Vryniotis · GitHub · 73206486 · dc11b1f6 · dc11b1f6
Unverified Commit dc11b1f6 authored Nov 04, 2022 by Vasilis Vryniotis Committed by GitHub Nov 04, 2022
10 changed files
--- a/test/prototype_transforms_kernel_infos.py
+++ b/test/prototype_transforms_kernel_infos.py
@@ -1184,38 +1184,38 @@ _PERSPECTIVE_COEFFS = [
 def sample_inputs_perspective_image_tensor():
    for image_loader in make_image_loaders(sizes=["random"]):
        for fill in [None, 128.0, 128, [12.0], [12.0 + c for c in range(image_loader.num_channels)]]:
-            yield ArgsKwargs(image_loader, fill=fill, perspective_coeffs=_PERSPECTIVE_COEFFS[0])
+            yield ArgsKwargs(image_loader, None, None, fill=fill, coefficients=_PERSPECTIVE_COEFFS[0])


 def reference_inputs_perspective_image_tensor():
-    for image_loader, perspective_coeffs in itertools.product(make_image_loaders(extra_dims=[()]), _PERSPECTIVE_COEFFS):
+    for image_loader, coefficients in itertools.product(make_image_loaders(extra_dims=[()]), _PERSPECTIVE_COEFFS):
        # FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it?
        for fill in [None, 128.0, 128, [12.0 + c for c in range(image_loader.num_channels)]]:
-            yield ArgsKwargs(image_loader, fill=fill, perspective_coeffs=perspective_coeffs)
+            yield ArgsKwargs(image_loader, None, None, fill=fill, coefficients=coefficients)


 def sample_inputs_perspective_bounding_box():
    for bounding_box_loader in make_bounding_box_loaders():
        yield ArgsKwargs(
-            bounding_box_loader, format=bounding_box_loader.format, perspective_coeffs=_PERSPECTIVE_COEFFS[0]
+            bounding_box_loader, bounding_box_loader.format, None, None, coefficients=_PERSPECTIVE_COEFFS[0]
        )


 def sample_inputs_perspective_mask():
    for mask_loader in make_mask_loaders(sizes=["random"]):
-        yield ArgsKwargs(mask_loader, perspective_coeffs=_PERSPECTIVE_COEFFS[0])
+        yield ArgsKwargs(mask_loader, None, None, coefficients=_PERSPECTIVE_COEFFS[0])


 def reference_inputs_perspective_mask():
    for mask_loader, perspective_coeffs in itertools.product(
        make_mask_loaders(extra_dims=[()], num_objects=[1]), _PERSPECTIVE_COEFFS
    ):
-        yield ArgsKwargs(mask_loader, perspective_coeffs=perspective_coeffs)
+        yield ArgsKwargs(mask_loader, None, None, coefficients=perspective_coeffs)


 def sample_inputs_perspective_video():
    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
-        yield ArgsKwargs(video_loader, perspective_coeffs=_PERSPECTIVE_COEFFS[0])
+        yield ArgsKwargs(video_loader, None, None, coefficients=_PERSPECTIVE_COEFFS[0])


 KERNEL_INFOS.extend(

--- a/test/test_prototype_transforms.py
+++ b/test/test_prototype_transforms.py
@@ -917,8 +917,8 @@ class TestRandomPerspective:
        params = transform._get_params([image])

        h, w = image.spatial_size
-        assert "perspective_coeffs" in params
-        assert len(params["perspective_coeffs"]) == 8
+        assert "coefficients" in params
+        assert len(params["coefficients"]) == 8

    @pytest.mark.parametrize("distortion_scale", [0.1, 0.7])
    def test__transform(self, distortion_scale, mocker):
@@ -940,7 +940,7 @@ class TestRandomPerspective:
        params = transform._get_params([inpt])

        fill = transforms._utils._convert_fill_arg(fill)
-        fn.assert_called_once_with(inpt, **params, fill=fill, interpolation=interpolation)
+        fn.assert_called_once_with(inpt, None, None, **params, fill=fill, interpolation=interpolation)


 class TestElasticTransform:

--- a/test/test_prototype_transforms_functional.py
+++ b/test/test_prototype_transforms_functional.py
@@ -874,7 +874,9 @@ def test_correctness_perspective_bounding_box(device, startpoints, endpoints):
        output_bboxes = F.perspective_bounding_box(
            bboxes,
            bboxes_format,
-            perspective_coeffs=pcoeffs,
+            None,
+            None,
+            coefficients=pcoeffs,
        )

        if bboxes.ndim < 2:

--- a/torchvision/prototype/features/_bounding_box.py
+++ b/torchvision/prototype/features/_bounding_box.py
@@ -169,11 +169,15 @@ class BoundingBox(_Feature):

    def perspective(
        self,
-        perspective_coeffs: List[float],
+        startpoints: Optional[List[List[int]]],
+        endpoints: Optional[List[List[int]]],
        interpolation: InterpolationMode = InterpolationMode.BILINEAR,
        fill: FillTypeJIT = None,
+        coefficients: Optional[List[float]] = None,
    ) -> BoundingBox:
-        output = self._F.perspective_bounding_box(self.as_subclass(torch.Tensor), self.format, perspective_coeffs)
+        output = self._F.perspective_bounding_box(
+            self.as_subclass(torch.Tensor), startpoints, endpoints, self.format, coefficients=coefficients
+        )
        return BoundingBox.wrap_like(self, output)

    def elastic(

--- a/torchvision/prototype/features/_feature.py
+++ b/torchvision/prototype/features/_feature.py
@@ -218,9 +218,11 @@ class _Feature(torch.Tensor):

    def perspective(
        self,
-        perspective_coeffs: List[float],
+        startpoints: Optional[List[List[int]]],
+        endpoints: Optional[List[List[int]]],
        interpolation: InterpolationMode = InterpolationMode.BILINEAR,
        fill: FillTypeJIT = None,
+        coefficients: Optional[List[float]] = None,
    ) -> _Feature:
        return self


--- a/torchvision/prototype/features/_image.py
+++ b/torchvision/prototype/features/_image.py
@@ -206,12 +206,19 @@ class Image(_Feature):

    def perspective(
        self,
-        perspective_coeffs: List[float],
+        startpoints: Optional[List[List[int]]],
+        endpoints: Optional[List[List[int]]],
        interpolation: InterpolationMode = InterpolationMode.BILINEAR,
        fill: FillTypeJIT = None,
+        coefficients: Optional[List[float]] = None,
    ) -> Image:
        output = self._F.perspective_image_tensor(
-            self.as_subclass(torch.Tensor), perspective_coeffs, interpolation=interpolation, fill=fill
+            self.as_subclass(torch.Tensor),
+            startpoints,
+            endpoints,
+            interpolation=interpolation,
+            fill=fill,
+            coefficients=coefficients,
        )
        return Image.wrap_like(self, output)


--- a/torchvision/prototype/features/_mask.py
+++ b/torchvision/prototype/features/_mask.py
@@ -118,11 +118,15 @@ class Mask(_Feature):

    def perspective(
        self,
-        perspective_coeffs: List[float],
+        startpoints: Optional[List[List[int]]],
+        endpoints: Optional[List[List[int]]],
        interpolation: InterpolationMode = InterpolationMode.NEAREST,
        fill: FillTypeJIT = None,
+        coefficients: Optional[List[float]] = None,
    ) -> Mask:
-        output = self._F.perspective_mask(self.as_subclass(torch.Tensor), perspective_coeffs, fill=fill)
+        output = self._F.perspective_mask(
+            self.as_subclass(torch.Tensor), startpoints, endpoints, fill=fill, coefficients=coefficients
+        )
        return Mask.wrap_like(self, output)

    def elastic(

--- a/torchvision/prototype/features/_video.py
+++ b/torchvision/prototype/features/_video.py
@@ -166,12 +166,19 @@ class Video(_Feature):

    def perspective(
        self,
-        perspective_coeffs: List[float],
+        startpoints: Optional[List[List[int]]],
+        endpoints: Optional[List[List[int]]],
        interpolation: InterpolationMode = InterpolationMode.BILINEAR,
        fill: FillTypeJIT = None,
+        coefficients: Optional[List[float]] = None,
    ) -> Video:
        output = self._F.perspective_video(
-            self.as_subclass(torch.Tensor), perspective_coeffs, interpolation=interpolation, fill=fill
+            self.as_subclass(torch.Tensor),
+            startpoints,
+            endpoints,
+            interpolation=interpolation,
+            fill=fill,
+            coefficients=coefficients,
        )
        return Video.wrap_like(self, output)


--- a/torchvision/prototype/transforms/_geometry.py
+++ b/torchvision/prototype/transforms/_geometry.py
@@ -524,15 +524,17 @@ class RandomPerspective(_RandomApplyTransform):
        startpoints = [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]]
        endpoints = [topleft, topright, botright, botleft]
        perspective_coeffs = _get_perspective_coeffs(startpoints, endpoints)
-        return dict(perspective_coeffs=perspective_coeffs)
+        return dict(coefficients=perspective_coeffs)

    def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
        fill = self.fill[type(inpt)]
        return F.perspective(
            inpt,
-            **params,
+            None,
+            None,
            fill=fill,
            interpolation=self.interpolation,
+            **params,
        )



--- a/torchvision/prototype/transforms/functional/_geometry.py
+++ b/torchvision/prototype/transforms/functional/_geometry.py
@@ -11,6 +11,7 @@ from torchvision.transforms import functional_pil as _FP, functional_tensor as _
 from torchvision.transforms.functional import (
    _compute_resized_output_size as __compute_resized_output_size,
    _get_inverse_affine_matrix,
+    _get_perspective_coeffs,
    InterpolationMode,
    pil_modes_mapping,
    pil_to_tensor,
@@ -906,12 +907,32 @@ def crop(inpt: features.InputTypeJIT, top: int, left: int, height: int, width: i
        return crop_image_pil(inpt, top, left, height, width)


+def _perspective_coefficients(
+    startpoints: Optional[List[List[int]]],
+    endpoints: Optional[List[List[int]]],
+    coefficients: Optional[List[float]],
+) -> List[float]:
+    if coefficients is not None:
+        if startpoints is not None and endpoints is not None:
+            raise ValueError("The startpoints/endpoints and the coefficients shouldn't be defined concurrently.")
+        elif len(coefficients) != 8:
+            raise ValueError("Argument coefficients should have 8 float values")
+        return coefficients
+    elif startpoints is not None and endpoints is not None:
+        return _get_perspective_coeffs(startpoints, endpoints)
+    else:
+        raise ValueError("Either the startpoints/endpoints or the coefficients must have non `None` values.")
+
+
 def perspective_image_tensor(
    image: torch.Tensor,
-    perspective_coeffs: List[float],
+    startpoints: Optional[List[List[int]]],
+    endpoints: Optional[List[List[int]]],
    interpolation: InterpolationMode = InterpolationMode.BILINEAR,
    fill: features.FillTypeJIT = None,
+    coefficients: Optional[List[float]] = None,
 ) -> torch.Tensor:
+    perspective_coeffs = _perspective_coefficients(startpoints, endpoints, coefficients)
    if image.numel() == 0:
        return image

@@ -934,21 +955,24 @@ def perspective_image_tensor(
 @torch.jit.unused
 def perspective_image_pil(
    image: PIL.Image.Image,
-    perspective_coeffs: List[float],
+    startpoints: Optional[List[List[int]]],
+    endpoints: Optional[List[List[int]]],
    interpolation: InterpolationMode = InterpolationMode.BICUBIC,
    fill: features.FillTypeJIT = None,
+    coefficients: Optional[List[float]] = None,
 ) -> PIL.Image.Image:
+    perspective_coeffs = _perspective_coefficients(startpoints, endpoints, coefficients)
    return _FP.perspective(image, perspective_coeffs, interpolation=pil_modes_mapping[interpolation], fill=fill)


 def perspective_bounding_box(
    bounding_box: torch.Tensor,
    format: features.BoundingBoxFormat,
-    perspective_coeffs: List[float],
+    startpoints: Optional[List[List[int]]],
+    endpoints: Optional[List[List[int]]],
+    coefficients: Optional[List[float]] = None,
 ) -> torch.Tensor:
-
-    if len(perspective_coeffs) != 8:
-        raise ValueError("Argument perspective_coeffs should have 8 float values")
+    perspective_coeffs = _perspective_coefficients(startpoints, endpoints, coefficients)

    original_shape = bounding_box.shape
    bounding_box = (
@@ -1029,8 +1053,10 @@ def perspective_bounding_box(

 def perspective_mask(
    mask: torch.Tensor,
-    perspective_coeffs: List[float],
+    startpoints: Optional[List[List[int]]],
+    endpoints: Optional[List[List[int]]],
    fill: features.FillTypeJIT = None,
+    coefficients: Optional[List[float]] = None,
 ) -> torch.Tensor:
    if mask.ndim < 3:
        mask = mask.unsqueeze(0)
@@ -1039,7 +1065,7 @@ def perspective_mask(
        needs_squeeze = False

    output = perspective_image_tensor(
-        mask, perspective_coeffs=perspective_coeffs, interpolation=InterpolationMode.NEAREST, fill=fill
+        mask, startpoints, endpoints, interpolation=InterpolationMode.NEAREST, fill=fill, coefficients=coefficients
    )

    if needs_squeeze:
@@ -1050,25 +1076,37 @@ def perspective_mask(

 def perspective_video(
    video: torch.Tensor,
-    perspective_coeffs: List[float],
+    startpoints: Optional[List[List[int]]],
+    endpoints: Optional[List[List[int]]],
    interpolation: InterpolationMode = InterpolationMode.BILINEAR,
    fill: features.FillTypeJIT = None,
+    coefficients: Optional[List[float]] = None,
 ) -> torch.Tensor:
-    return perspective_image_tensor(video, perspective_coeffs, interpolation=interpolation, fill=fill)
+    return perspective_image_tensor(
+        video, startpoints, endpoints, interpolation=interpolation, fill=fill, coefficients=coefficients
+    )


 def perspective(
    inpt: features.InputTypeJIT,
-    perspective_coeffs: List[float],
+    startpoints: Optional[List[List[int]]],
+    endpoints: Optional[List[List[int]]],
    interpolation: InterpolationMode = InterpolationMode.BILINEAR,
    fill: features.FillTypeJIT = None,
+    coefficients: Optional[List[float]] = None,
 ) -> features.InputTypeJIT:
    if isinstance(inpt, torch.Tensor) and (torch.jit.is_scripting() or not isinstance(inpt, features._Feature)):
-        return perspective_image_tensor(inpt, perspective_coeffs, interpolation=interpolation, fill=fill)
+        return perspective_image_tensor(
+            inpt, startpoints, endpoints, interpolation=interpolation, fill=fill, coefficients=coefficients
+        )
    elif isinstance(inpt, features._Feature):
-        return inpt.perspective(perspective_coeffs, interpolation=interpolation, fill=fill)
+        return inpt.perspective(
+            startpoints, endpoints, interpolation=interpolation, fill=fill, coefficients=coefficients
+        )
    else:
-        return perspective_image_pil(inpt, perspective_coeffs, interpolation=interpolation, fill=fill)
+        return perspective_image_pil(
+            inpt, startpoints, endpoints, interpolation=interpolation, fill=fill, coefficients=coefficients
+        )


 def elastic_image_tensor(