"git@developer.sourcefind.cn:OpenDAS/torchaudio.git" did not exist on "93cc6da7771baf4c7beae0b6373efbe9dc16485d"
Unverified Commit dc11b1f6 authored by Vasilis Vryniotis's avatar Vasilis Vryniotis Committed by GitHub
Browse files

[prototype] Restore BC on perspective (#6902)

* Restore BC on perspective

* Fixes linter

* Fixing tests.

* Apply code-review changes.

* Pleasing mypy.

* Revert named parameters.
parent 73206486
...@@ -1184,38 +1184,38 @@ _PERSPECTIVE_COEFFS = [ ...@@ -1184,38 +1184,38 @@ _PERSPECTIVE_COEFFS = [
def sample_inputs_perspective_image_tensor(): def sample_inputs_perspective_image_tensor():
for image_loader in make_image_loaders(sizes=["random"]): for image_loader in make_image_loaders(sizes=["random"]):
for fill in [None, 128.0, 128, [12.0], [12.0 + c for c in range(image_loader.num_channels)]]: for fill in [None, 128.0, 128, [12.0], [12.0 + c for c in range(image_loader.num_channels)]]:
yield ArgsKwargs(image_loader, fill=fill, perspective_coeffs=_PERSPECTIVE_COEFFS[0]) yield ArgsKwargs(image_loader, None, None, fill=fill, coefficients=_PERSPECTIVE_COEFFS[0])
def reference_inputs_perspective_image_tensor(): def reference_inputs_perspective_image_tensor():
for image_loader, perspective_coeffs in itertools.product(make_image_loaders(extra_dims=[()]), _PERSPECTIVE_COEFFS): for image_loader, coefficients in itertools.product(make_image_loaders(extra_dims=[()]), _PERSPECTIVE_COEFFS):
# FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it? # FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it?
for fill in [None, 128.0, 128, [12.0 + c for c in range(image_loader.num_channels)]]: for fill in [None, 128.0, 128, [12.0 + c for c in range(image_loader.num_channels)]]:
yield ArgsKwargs(image_loader, fill=fill, perspective_coeffs=perspective_coeffs) yield ArgsKwargs(image_loader, None, None, fill=fill, coefficients=coefficients)
def sample_inputs_perspective_bounding_box(): def sample_inputs_perspective_bounding_box():
for bounding_box_loader in make_bounding_box_loaders(): for bounding_box_loader in make_bounding_box_loaders():
yield ArgsKwargs( yield ArgsKwargs(
bounding_box_loader, format=bounding_box_loader.format, perspective_coeffs=_PERSPECTIVE_COEFFS[0] bounding_box_loader, bounding_box_loader.format, None, None, coefficients=_PERSPECTIVE_COEFFS[0]
) )
def sample_inputs_perspective_mask(): def sample_inputs_perspective_mask():
for mask_loader in make_mask_loaders(sizes=["random"]): for mask_loader in make_mask_loaders(sizes=["random"]):
yield ArgsKwargs(mask_loader, perspective_coeffs=_PERSPECTIVE_COEFFS[0]) yield ArgsKwargs(mask_loader, None, None, coefficients=_PERSPECTIVE_COEFFS[0])
def reference_inputs_perspective_mask(): def reference_inputs_perspective_mask():
for mask_loader, perspective_coeffs in itertools.product( for mask_loader, perspective_coeffs in itertools.product(
make_mask_loaders(extra_dims=[()], num_objects=[1]), _PERSPECTIVE_COEFFS make_mask_loaders(extra_dims=[()], num_objects=[1]), _PERSPECTIVE_COEFFS
): ):
yield ArgsKwargs(mask_loader, perspective_coeffs=perspective_coeffs) yield ArgsKwargs(mask_loader, None, None, coefficients=perspective_coeffs)
def sample_inputs_perspective_video(): def sample_inputs_perspective_video():
for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
yield ArgsKwargs(video_loader, perspective_coeffs=_PERSPECTIVE_COEFFS[0]) yield ArgsKwargs(video_loader, None, None, coefficients=_PERSPECTIVE_COEFFS[0])
KERNEL_INFOS.extend( KERNEL_INFOS.extend(
......
...@@ -917,8 +917,8 @@ class TestRandomPerspective: ...@@ -917,8 +917,8 @@ class TestRandomPerspective:
params = transform._get_params([image]) params = transform._get_params([image])
h, w = image.spatial_size h, w = image.spatial_size
assert "perspective_coeffs" in params assert "coefficients" in params
assert len(params["perspective_coeffs"]) == 8 assert len(params["coefficients"]) == 8
@pytest.mark.parametrize("distortion_scale", [0.1, 0.7]) @pytest.mark.parametrize("distortion_scale", [0.1, 0.7])
def test__transform(self, distortion_scale, mocker): def test__transform(self, distortion_scale, mocker):
...@@ -940,7 +940,7 @@ class TestRandomPerspective: ...@@ -940,7 +940,7 @@ class TestRandomPerspective:
params = transform._get_params([inpt]) params = transform._get_params([inpt])
fill = transforms._utils._convert_fill_arg(fill) fill = transforms._utils._convert_fill_arg(fill)
fn.assert_called_once_with(inpt, **params, fill=fill, interpolation=interpolation) fn.assert_called_once_with(inpt, None, None, **params, fill=fill, interpolation=interpolation)
class TestElasticTransform: class TestElasticTransform:
......
...@@ -874,7 +874,9 @@ def test_correctness_perspective_bounding_box(device, startpoints, endpoints): ...@@ -874,7 +874,9 @@ def test_correctness_perspective_bounding_box(device, startpoints, endpoints):
output_bboxes = F.perspective_bounding_box( output_bboxes = F.perspective_bounding_box(
bboxes, bboxes,
bboxes_format, bboxes_format,
perspective_coeffs=pcoeffs, None,
None,
coefficients=pcoeffs,
) )
if bboxes.ndim < 2: if bboxes.ndim < 2:
......
...@@ -169,11 +169,15 @@ class BoundingBox(_Feature): ...@@ -169,11 +169,15 @@ class BoundingBox(_Feature):
def perspective( def perspective(
self, self,
perspective_coeffs: List[float], startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR, interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: FillTypeJIT = None, fill: FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> BoundingBox: ) -> BoundingBox:
output = self._F.perspective_bounding_box(self.as_subclass(torch.Tensor), self.format, perspective_coeffs) output = self._F.perspective_bounding_box(
self.as_subclass(torch.Tensor), startpoints, endpoints, self.format, coefficients=coefficients
)
return BoundingBox.wrap_like(self, output) return BoundingBox.wrap_like(self, output)
def elastic( def elastic(
......
...@@ -218,9 +218,11 @@ class _Feature(torch.Tensor): ...@@ -218,9 +218,11 @@ class _Feature(torch.Tensor):
def perspective( def perspective(
self, self,
perspective_coeffs: List[float], startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR, interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: FillTypeJIT = None, fill: FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> _Feature: ) -> _Feature:
return self return self
......
...@@ -206,12 +206,19 @@ class Image(_Feature): ...@@ -206,12 +206,19 @@ class Image(_Feature):
def perspective( def perspective(
self, self,
perspective_coeffs: List[float], startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR, interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: FillTypeJIT = None, fill: FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> Image: ) -> Image:
output = self._F.perspective_image_tensor( output = self._F.perspective_image_tensor(
self.as_subclass(torch.Tensor), perspective_coeffs, interpolation=interpolation, fill=fill self.as_subclass(torch.Tensor),
startpoints,
endpoints,
interpolation=interpolation,
fill=fill,
coefficients=coefficients,
) )
return Image.wrap_like(self, output) return Image.wrap_like(self, output)
......
...@@ -118,11 +118,15 @@ class Mask(_Feature): ...@@ -118,11 +118,15 @@ class Mask(_Feature):
def perspective( def perspective(
self, self,
perspective_coeffs: List[float], startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.NEAREST, interpolation: InterpolationMode = InterpolationMode.NEAREST,
fill: FillTypeJIT = None, fill: FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> Mask: ) -> Mask:
output = self._F.perspective_mask(self.as_subclass(torch.Tensor), perspective_coeffs, fill=fill) output = self._F.perspective_mask(
self.as_subclass(torch.Tensor), startpoints, endpoints, fill=fill, coefficients=coefficients
)
return Mask.wrap_like(self, output) return Mask.wrap_like(self, output)
def elastic( def elastic(
......
...@@ -166,12 +166,19 @@ class Video(_Feature): ...@@ -166,12 +166,19 @@ class Video(_Feature):
def perspective( def perspective(
self, self,
perspective_coeffs: List[float], startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR, interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: FillTypeJIT = None, fill: FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> Video: ) -> Video:
output = self._F.perspective_video( output = self._F.perspective_video(
self.as_subclass(torch.Tensor), perspective_coeffs, interpolation=interpolation, fill=fill self.as_subclass(torch.Tensor),
startpoints,
endpoints,
interpolation=interpolation,
fill=fill,
coefficients=coefficients,
) )
return Video.wrap_like(self, output) return Video.wrap_like(self, output)
......
...@@ -524,15 +524,17 @@ class RandomPerspective(_RandomApplyTransform): ...@@ -524,15 +524,17 @@ class RandomPerspective(_RandomApplyTransform):
startpoints = [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]] startpoints = [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]]
endpoints = [topleft, topright, botright, botleft] endpoints = [topleft, topright, botright, botleft]
perspective_coeffs = _get_perspective_coeffs(startpoints, endpoints) perspective_coeffs = _get_perspective_coeffs(startpoints, endpoints)
return dict(perspective_coeffs=perspective_coeffs) return dict(coefficients=perspective_coeffs)
def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
fill = self.fill[type(inpt)] fill = self.fill[type(inpt)]
return F.perspective( return F.perspective(
inpt, inpt,
**params, None,
None,
fill=fill, fill=fill,
interpolation=self.interpolation, interpolation=self.interpolation,
**params,
) )
......
...@@ -11,6 +11,7 @@ from torchvision.transforms import functional_pil as _FP, functional_tensor as _ ...@@ -11,6 +11,7 @@ from torchvision.transforms import functional_pil as _FP, functional_tensor as _
from torchvision.transforms.functional import ( from torchvision.transforms.functional import (
_compute_resized_output_size as __compute_resized_output_size, _compute_resized_output_size as __compute_resized_output_size,
_get_inverse_affine_matrix, _get_inverse_affine_matrix,
_get_perspective_coeffs,
InterpolationMode, InterpolationMode,
pil_modes_mapping, pil_modes_mapping,
pil_to_tensor, pil_to_tensor,
...@@ -906,12 +907,32 @@ def crop(inpt: features.InputTypeJIT, top: int, left: int, height: int, width: i ...@@ -906,12 +907,32 @@ def crop(inpt: features.InputTypeJIT, top: int, left: int, height: int, width: i
return crop_image_pil(inpt, top, left, height, width) return crop_image_pil(inpt, top, left, height, width)
def _perspective_coefficients(
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
coefficients: Optional[List[float]],
) -> List[float]:
if coefficients is not None:
if startpoints is not None and endpoints is not None:
raise ValueError("The startpoints/endpoints and the coefficients shouldn't be defined concurrently.")
elif len(coefficients) != 8:
raise ValueError("Argument coefficients should have 8 float values")
return coefficients
elif startpoints is not None and endpoints is not None:
return _get_perspective_coeffs(startpoints, endpoints)
else:
raise ValueError("Either the startpoints/endpoints or the coefficients must have non `None` values.")
def perspective_image_tensor( def perspective_image_tensor(
image: torch.Tensor, image: torch.Tensor,
perspective_coeffs: List[float], startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR, interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: features.FillTypeJIT = None, fill: features.FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> torch.Tensor: ) -> torch.Tensor:
perspective_coeffs = _perspective_coefficients(startpoints, endpoints, coefficients)
if image.numel() == 0: if image.numel() == 0:
return image return image
...@@ -934,21 +955,24 @@ def perspective_image_tensor( ...@@ -934,21 +955,24 @@ def perspective_image_tensor(
@torch.jit.unused @torch.jit.unused
def perspective_image_pil( def perspective_image_pil(
image: PIL.Image.Image, image: PIL.Image.Image,
perspective_coeffs: List[float], startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BICUBIC, interpolation: InterpolationMode = InterpolationMode.BICUBIC,
fill: features.FillTypeJIT = None, fill: features.FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> PIL.Image.Image: ) -> PIL.Image.Image:
perspective_coeffs = _perspective_coefficients(startpoints, endpoints, coefficients)
return _FP.perspective(image, perspective_coeffs, interpolation=pil_modes_mapping[interpolation], fill=fill) return _FP.perspective(image, perspective_coeffs, interpolation=pil_modes_mapping[interpolation], fill=fill)
def perspective_bounding_box( def perspective_bounding_box(
bounding_box: torch.Tensor, bounding_box: torch.Tensor,
format: features.BoundingBoxFormat, format: features.BoundingBoxFormat,
perspective_coeffs: List[float], startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
coefficients: Optional[List[float]] = None,
) -> torch.Tensor: ) -> torch.Tensor:
perspective_coeffs = _perspective_coefficients(startpoints, endpoints, coefficients)
if len(perspective_coeffs) != 8:
raise ValueError("Argument perspective_coeffs should have 8 float values")
original_shape = bounding_box.shape original_shape = bounding_box.shape
bounding_box = ( bounding_box = (
...@@ -1029,8 +1053,10 @@ def perspective_bounding_box( ...@@ -1029,8 +1053,10 @@ def perspective_bounding_box(
def perspective_mask( def perspective_mask(
mask: torch.Tensor, mask: torch.Tensor,
perspective_coeffs: List[float], startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
fill: features.FillTypeJIT = None, fill: features.FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> torch.Tensor: ) -> torch.Tensor:
if mask.ndim < 3: if mask.ndim < 3:
mask = mask.unsqueeze(0) mask = mask.unsqueeze(0)
...@@ -1039,7 +1065,7 @@ def perspective_mask( ...@@ -1039,7 +1065,7 @@ def perspective_mask(
needs_squeeze = False needs_squeeze = False
output = perspective_image_tensor( output = perspective_image_tensor(
mask, perspective_coeffs=perspective_coeffs, interpolation=InterpolationMode.NEAREST, fill=fill mask, startpoints, endpoints, interpolation=InterpolationMode.NEAREST, fill=fill, coefficients=coefficients
) )
if needs_squeeze: if needs_squeeze:
...@@ -1050,25 +1076,37 @@ def perspective_mask( ...@@ -1050,25 +1076,37 @@ def perspective_mask(
def perspective_video( def perspective_video(
video: torch.Tensor, video: torch.Tensor,
perspective_coeffs: List[float], startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR, interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: features.FillTypeJIT = None, fill: features.FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> torch.Tensor: ) -> torch.Tensor:
return perspective_image_tensor(video, perspective_coeffs, interpolation=interpolation, fill=fill) return perspective_image_tensor(
video, startpoints, endpoints, interpolation=interpolation, fill=fill, coefficients=coefficients
)
def perspective( def perspective(
inpt: features.InputTypeJIT, inpt: features.InputTypeJIT,
perspective_coeffs: List[float], startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR, interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: features.FillTypeJIT = None, fill: features.FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> features.InputTypeJIT: ) -> features.InputTypeJIT:
if isinstance(inpt, torch.Tensor) and (torch.jit.is_scripting() or not isinstance(inpt, features._Feature)): if isinstance(inpt, torch.Tensor) and (torch.jit.is_scripting() or not isinstance(inpt, features._Feature)):
return perspective_image_tensor(inpt, perspective_coeffs, interpolation=interpolation, fill=fill) return perspective_image_tensor(
inpt, startpoints, endpoints, interpolation=interpolation, fill=fill, coefficients=coefficients
)
elif isinstance(inpt, features._Feature): elif isinstance(inpt, features._Feature):
return inpt.perspective(perspective_coeffs, interpolation=interpolation, fill=fill) return inpt.perspective(
startpoints, endpoints, interpolation=interpolation, fill=fill, coefficients=coefficients
)
else: else:
return perspective_image_pil(inpt, perspective_coeffs, interpolation=interpolation, fill=fill) return perspective_image_pil(
inpt, startpoints, endpoints, interpolation=interpolation, fill=fill, coefficients=coefficients
)
def elastic_image_tensor( def elastic_image_tensor(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment