Revamp transforms doc (#7859)

Co-authored-by: Philip Meier <github.pmeier@posteo.de>

Revamp transforms doc (#7859)
Co-authored-by: Philip Meier <github.pmeier@posteo.de>
37081ee6 · Nicolas Hug · GitHub · 2c44ebae · 37081ee6 · 37081ee6
Unverified Commit 37081ee6 authored Aug 22, 2023 by Nicolas Hug Committed by GitHub Aug 22, 2023
7 changed files
--- a/torchvision/transforms/v2/functional/_deprecated.py
+++ b/torchvision/transforms/v2/functional/_deprecated.py
@@ -8,6 +8,7 @@ from torchvision.transforms import functional as _F

 @torch.jit.unused
 def to_tensor(inpt: Any) -> torch.Tensor:
+    """[BETA] [DEPREACTED] Use to_image() and to_dtype() instead."""
    warnings.warn(
        "The function `to_tensor(...)` is deprecated and will be removed in a future release. "
        "Instead, please use `to_image(...)` followed by `to_dtype(..., dtype=torch.float32, scale=True)`."

--- a/torchvision/transforms/v2/functional/_geometry.py
+++ b/torchvision/transforms/v2/functional/_geometry.py
@@ -23,7 +23,7 @@ from torchvision.transforms.functional import (

 from torchvision.utils import _log_api_usage_once

-from ._meta import _get_size_image_pil, clamp_bounding_boxes, convert_format_bounding_boxes
+from ._meta import _get_size_image_pil, clamp_bounding_boxes, convert_bounding_box_format

 from ._utils import _FillTypeJIT, _get_kernel, _register_five_ten_crop_kernel_internal, _register_kernel_internal

@@ -40,6 +40,7 @@ def _check_interpolation(interpolation: Union[InterpolationMode, int]) -> Interp


 def horizontal_flip(inpt: torch.Tensor) -> torch.Tensor:
+    """[BETA] See :class:`~torchvision.transforms.v2.RandomHorizontalFlip` for details."""
    if torch.jit.is_scripting():
        return horizontal_flip_image(inpt)

@@ -96,6 +97,7 @@ def horizontal_flip_video(video: torch.Tensor) -> torch.Tensor:


 def vertical_flip(inpt: torch.Tensor) -> torch.Tensor:
+    """[BETA] See :class:`~torchvision.transforms.v2.RandomVerticalFlip` for details."""
    if torch.jit.is_scripting():
        return vertical_flip_image(inpt)

@@ -177,6 +179,7 @@ def resize(
    max_size: Optional[int] = None,
    antialias: Optional[Union[str, bool]] = "warn",
 ) -> torch.Tensor:
+    """[BETA] See :class:`~torchvision.transforms.v2.Resize` for details."""
    if torch.jit.is_scripting():
        return resize_image(inpt, size=size, interpolation=interpolation, max_size=max_size, antialias=antialias)

@@ -373,6 +376,7 @@ def affine(
    fill: _FillTypeJIT = None,
    center: Optional[List[float]] = None,
 ) -> torch.Tensor:
+    """[BETA] See :class:`~torchvision.transforms.v2.RandomAffine` for details."""
    if torch.jit.is_scripting():
        return affine_image(
            inpt,
@@ -744,7 +748,7 @@ def _affine_bounding_boxes_with_expand(
    dtype = bounding_boxes.dtype
    device = bounding_boxes.device
    bounding_boxes = (
-        convert_format_bounding_boxes(
+        convert_bounding_box_format(
            bounding_boxes, old_format=format, new_format=datapoints.BoundingBoxFormat.XYXY, inplace=True
        )
    ).reshape(-1, 4)
@@ -805,7 +809,7 @@ def _affine_bounding_boxes_with_expand(
        canvas_size = (new_height, new_width)

    out_bboxes = clamp_bounding_boxes(out_bboxes, format=datapoints.BoundingBoxFormat.XYXY, canvas_size=canvas_size)
-    out_bboxes = convert_format_bounding_boxes(
+    out_bboxes = convert_bounding_box_format(
        out_bboxes, old_format=datapoints.BoundingBoxFormat.XYXY, new_format=format, inplace=True
    ).reshape(original_shape)

@@ -946,6 +950,7 @@ def rotate(
    center: Optional[List[float]] = None,
    fill: _FillTypeJIT = None,
 ) -> torch.Tensor:
+    """[BETA] See :class:`~torchvision.transforms.v2.RandomRotation` for details."""
    if torch.jit.is_scripting():
        return rotate_image(inpt, angle=angle, interpolation=interpolation, expand=expand, fill=fill, center=center)

@@ -1118,6 +1123,7 @@ def pad(
    fill: Optional[Union[int, float, List[float]]] = None,
    padding_mode: str = "constant",
 ) -> torch.Tensor:
+    """[BETA] See :class:`~torchvision.transforms.v2.Pad` for details."""
    if torch.jit.is_scripting():
        return pad_image(inpt, padding=padding, fill=fill, padding_mode=padding_mode)

@@ -1333,6 +1339,7 @@ def pad_video(


 def crop(inpt: torch.Tensor, top: int, left: int, height: int, width: int) -> torch.Tensor:
+    """[BETA] See :class:`~torchvision.transforms.v2.RandomCrop` for details."""
    if torch.jit.is_scripting():
        return crop_image(inpt, top=top, left=left, height=height, width=width)

@@ -1426,6 +1433,7 @@ def perspective(
    fill: _FillTypeJIT = None,
    coefficients: Optional[List[float]] = None,
 ) -> torch.Tensor:
+    """[BETA] See :class:`~torchvision.transforms.v2.RandomPerspective` for details."""
    if torch.jit.is_scripting():
        return perspective_image(
            inpt,
@@ -1572,9 +1580,9 @@ def perspective_bounding_boxes(
    perspective_coeffs = _perspective_coefficients(startpoints, endpoints, coefficients)

    original_shape = bounding_boxes.shape
-    # TODO: first cast to float if bbox is int64 before convert_format_bounding_boxes
+    # TODO: first cast to float if bbox is int64 before convert_bounding_box_format
    bounding_boxes = (
-        convert_format_bounding_boxes(bounding_boxes, old_format=format, new_format=datapoints.BoundingBoxFormat.XYXY)
+        convert_bounding_box_format(bounding_boxes, old_format=format, new_format=datapoints.BoundingBoxFormat.XYXY)
    ).reshape(-1, 4)

    dtype = bounding_boxes.dtype if torch.is_floating_point(bounding_boxes) else torch.float32
@@ -1647,7 +1655,7 @@ def perspective_bounding_boxes(

    # out_bboxes should be of shape [N boxes, 4]

-    return convert_format_bounding_boxes(
+    return convert_bounding_box_format(
        out_bboxes, old_format=datapoints.BoundingBoxFormat.XYXY, new_format=format, inplace=True
    ).reshape(original_shape)

@@ -1733,6 +1741,7 @@ def elastic(
    interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
    fill: _FillTypeJIT = None,
 ) -> torch.Tensor:
+    """[BETA] See :class:`~torchvision.transforms.v2.ElasticTransform` for details."""
    if torch.jit.is_scripting():
        return elastic_image(inpt, displacement=displacement, interpolation=interpolation, fill=fill)

@@ -1853,9 +1862,9 @@ def elastic_bounding_boxes(
        displacement = displacement.to(dtype=dtype, device=device)

    original_shape = bounding_boxes.shape
-    # TODO: first cast to float if bbox is int64 before convert_format_bounding_boxes
+    # TODO: first cast to float if bbox is int64 before convert_bounding_box_format
    bounding_boxes = (
-        convert_format_bounding_boxes(bounding_boxes, old_format=format, new_format=datapoints.BoundingBoxFormat.XYXY)
+        convert_bounding_box_format(bounding_boxes, old_format=format, new_format=datapoints.BoundingBoxFormat.XYXY)
    ).reshape(-1, 4)

    id_grid = _create_identity_grid(canvas_size, device=device, dtype=dtype)
@@ -1882,7 +1891,7 @@ def elastic_bounding_boxes(
        canvas_size=canvas_size,
    )

-    return convert_format_bounding_boxes(
+    return convert_bounding_box_format(
        out_bboxes, old_format=datapoints.BoundingBoxFormat.XYXY, new_format=format, inplace=True
    ).reshape(original_shape)

@@ -1935,6 +1944,7 @@ def elastic_video(


 def center_crop(inpt: torch.Tensor, output_size: List[int]) -> torch.Tensor:
+    """[BETA] See :class:`~torchvision.transforms.v2.RandomCrop` for details."""
    if torch.jit.is_scripting():
        return center_crop_image(inpt, output_size=output_size)

@@ -2063,6 +2073,7 @@ def resized_crop(
    interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
    antialias: Optional[Union[str, bool]] = "warn",
 ) -> torch.Tensor:
+    """[BETA] See :class:`~torchvision.transforms.v2.RandomResizedCrop` for details."""
    if torch.jit.is_scripting():
        return resized_crop_image(
            inpt,
@@ -2207,6 +2218,7 @@ def resized_crop_video(
 def five_crop(
    inpt: torch.Tensor, size: List[int]
 ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    """[BETA] See :class:`~torchvision.transforms.v2.FiveCrop` for details."""
    if torch.jit.is_scripting():
        return five_crop_image(inpt, size=size)

@@ -2290,6 +2302,7 @@ def ten_crop(
    torch.Tensor,
    torch.Tensor,
 ]:
+    """[BETA] See :class:`~torchvision.transforms.v2.TenCrop` for details."""
    if torch.jit.is_scripting():
        return ten_crop_image(inpt, size=size, vertical_flip=vertical_flip)


--- a/torchvision/transforms/v2/functional/_meta.py
+++ b/torchvision/transforms/v2/functional/_meta.py
@@ -176,7 +176,7 @@ def _xyxy_to_cxcywh(xyxy: torch.Tensor, inplace: bool) -> torch.Tensor:
    return xyxy


-def _convert_format_bounding_boxes(
+def _convert_bounding_box_format(
    bounding_boxes: torch.Tensor, old_format: BoundingBoxFormat, new_format: BoundingBoxFormat, inplace: bool = False
 ) -> torch.Tensor:

@@ -197,30 +197,31 @@ def _convert_format_bounding_boxes(
    return bounding_boxes


-def convert_format_bounding_boxes(
+def convert_bounding_box_format(
    inpt: torch.Tensor,
    old_format: Optional[BoundingBoxFormat] = None,
    new_format: Optional[BoundingBoxFormat] = None,
    inplace: bool = False,
 ) -> torch.Tensor:
+    """[BETA] See :func:`~torchvision.transforms.v2.ConvertBoundingBoxFormat` for details."""
    # This being a kernel / functional hybrid, we need an option to pass `old_format` explicitly for pure tensor
    # inputs as well as extract it from `datapoints.BoundingBoxes` inputs. However, putting a default value on
    # `old_format` means we also need to put one on `new_format` to have syntactically correct Python. Here we mimic the
    # default error that would be thrown if `new_format` had no default value.
    if new_format is None:
-        raise TypeError("convert_format_bounding_boxes() missing 1 required argument: 'new_format'")
+        raise TypeError("convert_bounding_box_format() missing 1 required argument: 'new_format'")

    if not torch.jit.is_scripting():
-        _log_api_usage_once(convert_format_bounding_boxes)
+        _log_api_usage_once(convert_bounding_box_format)

    if torch.jit.is_scripting() or is_pure_tensor(inpt):
        if old_format is None:
            raise ValueError("For pure tensor inputs, `old_format` has to be passed.")
-        return _convert_format_bounding_boxes(inpt, old_format=old_format, new_format=new_format, inplace=inplace)
+        return _convert_bounding_box_format(inpt, old_format=old_format, new_format=new_format, inplace=inplace)
    elif isinstance(inpt, datapoints.BoundingBoxes):
        if old_format is not None:
            raise ValueError("For bounding box datapoint inputs, `old_format` must not be passed.")
-        output = _convert_format_bounding_boxes(
+        output = _convert_bounding_box_format(
            inpt.as_subclass(torch.Tensor), old_format=inpt.format, new_format=new_format, inplace=inplace
        )
        return datapoints.wrap(output, like=inpt, format=new_format)
@@ -237,12 +238,12 @@ def _clamp_bounding_boxes(
    #  BoundingBoxFormat instead of converting back and forth
    in_dtype = bounding_boxes.dtype
    bounding_boxes = bounding_boxes.clone() if bounding_boxes.is_floating_point() else bounding_boxes.float()
-    xyxy_boxes = convert_format_bounding_boxes(
+    xyxy_boxes = convert_bounding_box_format(
        bounding_boxes, old_format=format, new_format=datapoints.BoundingBoxFormat.XYXY, inplace=True
    )
    xyxy_boxes[..., 0::2].clamp_(min=0, max=canvas_size[1])
    xyxy_boxes[..., 1::2].clamp_(min=0, max=canvas_size[0])
-    out_boxes = convert_format_bounding_boxes(
+    out_boxes = convert_bounding_box_format(
        xyxy_boxes, old_format=BoundingBoxFormat.XYXY, new_format=format, inplace=True
    )
    return out_boxes.to(in_dtype)
@@ -253,6 +254,7 @@ def clamp_bounding_boxes(
    format: Optional[BoundingBoxFormat] = None,
    canvas_size: Optional[Tuple[int, int]] = None,
 ) -> torch.Tensor:
+    """[BETA] See :func:`~torchvision.transforms.v2.ClampBoundingBoxes` for details."""
    if not torch.jit.is_scripting():
        _log_api_usage_once(clamp_bounding_boxes)


--- a/torchvision/transforms/v2/functional/_misc.py
+++ b/torchvision/transforms/v2/functional/_misc.py
@@ -20,6 +20,7 @@ def normalize(
    std: List[float],
    inplace: bool = False,
 ) -> torch.Tensor:
+    """[BETA] See :class:`~torchvision.transforms.v2.Normalize` for details."""
    if torch.jit.is_scripting():
        return normalize_image(inpt, mean=mean, std=std, inplace=inplace)

@@ -70,6 +71,7 @@ def normalize_video(video: torch.Tensor, mean: List[float], std: List[float], in


 def gaussian_blur(inpt: torch.Tensor, kernel_size: List[int], sigma: Optional[List[float]] = None) -> torch.Tensor:
+    """[BETA] See :class:`~torchvision.transforms.v2.GaussianBlur` for details."""
    if torch.jit.is_scripting():
        return gaussian_blur_image(inpt, kernel_size=kernel_size, sigma=sigma)

@@ -178,6 +180,7 @@ def gaussian_blur_video(


 def to_dtype(inpt: torch.Tensor, dtype: torch.dtype = torch.float, scale: bool = False) -> torch.Tensor:
+    """[BETA] See :func:`~torchvision.transforms.v2.ToDtype` for details."""
    if torch.jit.is_scripting():
        return to_dtype_image(inpt, dtype=dtype, scale=scale)

@@ -258,6 +261,7 @@ def to_dtype_image(image: torch.Tensor, dtype: torch.dtype = torch.float, scale:

 # We encourage users to use to_dtype() instead but we keep this for BC
 def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float32) -> torch.Tensor:
+    """[BETA] [DEPRECATED] Use to_dtype() instead."""
    return to_dtype_image(image, dtype=dtype, scale=True)



--- a/torchvision/transforms/v2/functional/_temporal.py
+++ b/torchvision/transforms/v2/functional/_temporal.py
@@ -8,6 +8,7 @@ from ._utils import _get_kernel, _register_kernel_internal


 def uniform_temporal_subsample(inpt: torch.Tensor, num_samples: int) -> torch.Tensor:
+    """[BETA] See :class:`~torchvision.transforms.v2.UniformTemporalSubsample` for details."""
    if torch.jit.is_scripting():
        return uniform_temporal_subsample_video(inpt, num_samples=num_samples)


--- a/torchvision/transforms/v2/functional/_type_conversion.py
+++ b/torchvision/transforms/v2/functional/_type_conversion.py
@@ -9,6 +9,7 @@ from torchvision.transforms import functional as _F

 @torch.jit.unused
 def to_image(inpt: Union[torch.Tensor, PIL.Image.Image, np.ndarray]) -> datapoints.Image:
+    """[BETA] See :class:`~torchvision.transforms.v2.ToImage` for details."""
    if isinstance(inpt, np.ndarray):
        output = torch.from_numpy(inpt).permute((2, 0, 1)).contiguous()
    elif isinstance(inpt, PIL.Image.Image):

--- a/torchvision/transforms/v2/functional/_utils.py
+++ b/torchvision/transforms/v2/functional/_utils.py
@@ -67,7 +67,7 @@ _BUILTIN_DATAPOINT_TYPES = {


 def register_kernel(functional, datapoint_cls):
-    """Decorate a kernel to register it for a functional and a (custom) datapoint type.
+    """[BETA] Decorate a kernel to register it for a functional and a (custom) datapoint type.

    See :ref:`sphx_glr_auto_examples_v2_transforms_plot_custom_datapoints.py` for usage
    details.