Unverified Commit dc11b1f6 authored by Vasilis Vryniotis's avatar Vasilis Vryniotis Committed by GitHub
Browse files

[prototype] Restore BC on perspective (#6902)

* Restore BC on perspective

* Fixes linter

* Fixing tests.

* Apply code-review changes.

* Pleasing mypy.

* Revert named parameters.
parent 73206486
......@@ -1184,38 +1184,38 @@ _PERSPECTIVE_COEFFS = [
def sample_inputs_perspective_image_tensor():
for image_loader in make_image_loaders(sizes=["random"]):
for fill in [None, 128.0, 128, [12.0], [12.0 + c for c in range(image_loader.num_channels)]]:
yield ArgsKwargs(image_loader, fill=fill, perspective_coeffs=_PERSPECTIVE_COEFFS[0])
yield ArgsKwargs(image_loader, None, None, fill=fill, coefficients=_PERSPECTIVE_COEFFS[0])
def reference_inputs_perspective_image_tensor():
for image_loader, perspective_coeffs in itertools.product(make_image_loaders(extra_dims=[()]), _PERSPECTIVE_COEFFS):
for image_loader, coefficients in itertools.product(make_image_loaders(extra_dims=[()]), _PERSPECTIVE_COEFFS):
# FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it?
for fill in [None, 128.0, 128, [12.0 + c for c in range(image_loader.num_channels)]]:
yield ArgsKwargs(image_loader, fill=fill, perspective_coeffs=perspective_coeffs)
yield ArgsKwargs(image_loader, None, None, fill=fill, coefficients=coefficients)
def sample_inputs_perspective_bounding_box():
for bounding_box_loader in make_bounding_box_loaders():
yield ArgsKwargs(
bounding_box_loader, format=bounding_box_loader.format, perspective_coeffs=_PERSPECTIVE_COEFFS[0]
bounding_box_loader, bounding_box_loader.format, None, None, coefficients=_PERSPECTIVE_COEFFS[0]
)
def sample_inputs_perspective_mask():
for mask_loader in make_mask_loaders(sizes=["random"]):
yield ArgsKwargs(mask_loader, perspective_coeffs=_PERSPECTIVE_COEFFS[0])
yield ArgsKwargs(mask_loader, None, None, coefficients=_PERSPECTIVE_COEFFS[0])
def reference_inputs_perspective_mask():
for mask_loader, perspective_coeffs in itertools.product(
make_mask_loaders(extra_dims=[()], num_objects=[1]), _PERSPECTIVE_COEFFS
):
yield ArgsKwargs(mask_loader, perspective_coeffs=perspective_coeffs)
yield ArgsKwargs(mask_loader, None, None, coefficients=perspective_coeffs)
def sample_inputs_perspective_video():
for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
yield ArgsKwargs(video_loader, perspective_coeffs=_PERSPECTIVE_COEFFS[0])
yield ArgsKwargs(video_loader, None, None, coefficients=_PERSPECTIVE_COEFFS[0])
KERNEL_INFOS.extend(
......
......@@ -917,8 +917,8 @@ class TestRandomPerspective:
params = transform._get_params([image])
h, w = image.spatial_size
assert "perspective_coeffs" in params
assert len(params["perspective_coeffs"]) == 8
assert "coefficients" in params
assert len(params["coefficients"]) == 8
@pytest.mark.parametrize("distortion_scale", [0.1, 0.7])
def test__transform(self, distortion_scale, mocker):
......@@ -940,7 +940,7 @@ class TestRandomPerspective:
params = transform._get_params([inpt])
fill = transforms._utils._convert_fill_arg(fill)
fn.assert_called_once_with(inpt, **params, fill=fill, interpolation=interpolation)
fn.assert_called_once_with(inpt, None, None, **params, fill=fill, interpolation=interpolation)
class TestElasticTransform:
......
......@@ -874,7 +874,9 @@ def test_correctness_perspective_bounding_box(device, startpoints, endpoints):
output_bboxes = F.perspective_bounding_box(
bboxes,
bboxes_format,
perspective_coeffs=pcoeffs,
None,
None,
coefficients=pcoeffs,
)
if bboxes.ndim < 2:
......
......@@ -169,11 +169,15 @@ class BoundingBox(_Feature):
def perspective(
self,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> BoundingBox:
output = self._F.perspective_bounding_box(self.as_subclass(torch.Tensor), self.format, perspective_coeffs)
output = self._F.perspective_bounding_box(
self.as_subclass(torch.Tensor), startpoints, endpoints, self.format, coefficients=coefficients
)
return BoundingBox.wrap_like(self, output)
def elastic(
......
......@@ -218,9 +218,11 @@ class _Feature(torch.Tensor):
def perspective(
self,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> _Feature:
return self
......
......@@ -206,12 +206,19 @@ class Image(_Feature):
def perspective(
self,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> Image:
output = self._F.perspective_image_tensor(
self.as_subclass(torch.Tensor), perspective_coeffs, interpolation=interpolation, fill=fill
self.as_subclass(torch.Tensor),
startpoints,
endpoints,
interpolation=interpolation,
fill=fill,
coefficients=coefficients,
)
return Image.wrap_like(self, output)
......
......@@ -118,11 +118,15 @@ class Mask(_Feature):
def perspective(
self,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.NEAREST,
fill: FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> Mask:
output = self._F.perspective_mask(self.as_subclass(torch.Tensor), perspective_coeffs, fill=fill)
output = self._F.perspective_mask(
self.as_subclass(torch.Tensor), startpoints, endpoints, fill=fill, coefficients=coefficients
)
return Mask.wrap_like(self, output)
def elastic(
......
......@@ -166,12 +166,19 @@ class Video(_Feature):
def perspective(
self,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> Video:
output = self._F.perspective_video(
self.as_subclass(torch.Tensor), perspective_coeffs, interpolation=interpolation, fill=fill
self.as_subclass(torch.Tensor),
startpoints,
endpoints,
interpolation=interpolation,
fill=fill,
coefficients=coefficients,
)
return Video.wrap_like(self, output)
......
......@@ -524,15 +524,17 @@ class RandomPerspective(_RandomApplyTransform):
startpoints = [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]]
endpoints = [topleft, topright, botright, botleft]
perspective_coeffs = _get_perspective_coeffs(startpoints, endpoints)
return dict(perspective_coeffs=perspective_coeffs)
return dict(coefficients=perspective_coeffs)
def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
fill = self.fill[type(inpt)]
return F.perspective(
inpt,
**params,
None,
None,
fill=fill,
interpolation=self.interpolation,
**params,
)
......
......@@ -11,6 +11,7 @@ from torchvision.transforms import functional_pil as _FP, functional_tensor as _
from torchvision.transforms.functional import (
_compute_resized_output_size as __compute_resized_output_size,
_get_inverse_affine_matrix,
_get_perspective_coeffs,
InterpolationMode,
pil_modes_mapping,
pil_to_tensor,
......@@ -906,12 +907,32 @@ def crop(inpt: features.InputTypeJIT, top: int, left: int, height: int, width: i
return crop_image_pil(inpt, top, left, height, width)
def _perspective_coefficients(
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
coefficients: Optional[List[float]],
) -> List[float]:
if coefficients is not None:
if startpoints is not None and endpoints is not None:
raise ValueError("The startpoints/endpoints and the coefficients shouldn't be defined concurrently.")
elif len(coefficients) != 8:
raise ValueError("Argument coefficients should have 8 float values")
return coefficients
elif startpoints is not None and endpoints is not None:
return _get_perspective_coeffs(startpoints, endpoints)
else:
raise ValueError("Either the startpoints/endpoints or the coefficients must have non `None` values.")
def perspective_image_tensor(
image: torch.Tensor,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: features.FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> torch.Tensor:
perspective_coeffs = _perspective_coefficients(startpoints, endpoints, coefficients)
if image.numel() == 0:
return image
......@@ -934,21 +955,24 @@ def perspective_image_tensor(
@torch.jit.unused
def perspective_image_pil(
image: PIL.Image.Image,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BICUBIC,
fill: features.FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> PIL.Image.Image:
perspective_coeffs = _perspective_coefficients(startpoints, endpoints, coefficients)
return _FP.perspective(image, perspective_coeffs, interpolation=pil_modes_mapping[interpolation], fill=fill)
def perspective_bounding_box(
bounding_box: torch.Tensor,
format: features.BoundingBoxFormat,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
coefficients: Optional[List[float]] = None,
) -> torch.Tensor:
if len(perspective_coeffs) != 8:
raise ValueError("Argument perspective_coeffs should have 8 float values")
perspective_coeffs = _perspective_coefficients(startpoints, endpoints, coefficients)
original_shape = bounding_box.shape
bounding_box = (
......@@ -1029,8 +1053,10 @@ def perspective_bounding_box(
def perspective_mask(
mask: torch.Tensor,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
fill: features.FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> torch.Tensor:
if mask.ndim < 3:
mask = mask.unsqueeze(0)
......@@ -1039,7 +1065,7 @@ def perspective_mask(
needs_squeeze = False
output = perspective_image_tensor(
mask, perspective_coeffs=perspective_coeffs, interpolation=InterpolationMode.NEAREST, fill=fill
mask, startpoints, endpoints, interpolation=InterpolationMode.NEAREST, fill=fill, coefficients=coefficients
)
if needs_squeeze:
......@@ -1050,25 +1076,37 @@ def perspective_mask(
def perspective_video(
video: torch.Tensor,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: features.FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> torch.Tensor:
return perspective_image_tensor(video, perspective_coeffs, interpolation=interpolation, fill=fill)
return perspective_image_tensor(
video, startpoints, endpoints, interpolation=interpolation, fill=fill, coefficients=coefficients
)
def perspective(
inpt: features.InputTypeJIT,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: features.FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> features.InputTypeJIT:
if isinstance(inpt, torch.Tensor) and (torch.jit.is_scripting() or not isinstance(inpt, features._Feature)):
return perspective_image_tensor(inpt, perspective_coeffs, interpolation=interpolation, fill=fill)
return perspective_image_tensor(
inpt, startpoints, endpoints, interpolation=interpolation, fill=fill, coefficients=coefficients
)
elif isinstance(inpt, features._Feature):
return inpt.perspective(perspective_coeffs, interpolation=interpolation, fill=fill)
return inpt.perspective(
startpoints, endpoints, interpolation=interpolation, fill=fill, coefficients=coefficients
)
else:
return perspective_image_pil(inpt, perspective_coeffs, interpolation=interpolation, fill=fill)
return perspective_image_pil(
inpt, startpoints, endpoints, interpolation=interpolation, fill=fill, coefficients=coefficients
)
def elastic_image_tensor(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment