Various doc enhancements (#7326)

Co-authored-by: Philip Meier <github.pmeier@posteo.de> Co-authored-by: vfdev <vfdev.5@gmail.com>

Various doc enhancements (#7326)
Co-authored-by: Philip Meier <github.pmeier@posteo.de> Co-authored-by: vfdev <vfdev.5@gmail.com>
877ffd9f · Nicolas Hug · GitHub · a376f797 · 877ffd9f · 877ffd9f
Unverified Commit 877ffd9f authored Feb 24, 2023 by Nicolas Hug Committed by GitHub Feb 24, 2023
10 changed files
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -34,6 +34,7 @@ from tabulate import tabulate
 sys.path.append(os.path.abspath("."))

 torchvision.disable_beta_transforms_warning()
+import torchvision.datapoints  # Don't remove, otherwise the docs for datapoints aren't linked properly

 # -- General configuration ------------------------------------------------


--- a/docs/source/datapoints.rst
+++ b/docs/source/datapoints.rst
@@ -2,6 +2,12 @@ Datapoints
 ==========

 .. currentmodule:: torchvision.datapoints
+
+Datapoints are tensor subclasses which the :mod:`~torchvision.transforms.v2` v2 transforms use under the hood to
+dispatch their inputs to the appropriate lower-level kernels. Most users do not
+need to manipulate datapoints directly and can simply rely on dataset wrapping -
+see e.g. :ref:`sphx_glr_auto_examples_plot_transforms_v2_e2e.py`.
+
 .. autosummary::
    :toctree: generated/
    :template: class.rst

--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -31,8 +31,8 @@ architectures, and common image transformations for computer vision.
   :maxdepth: 2
   :caption: Package Reference

-   datapoints
   transforms
+   datapoints
   models
   datasets
   utils

--- a/docs/source/transforms.rst
+++ b/docs/source/transforms.rst
@@ -198,6 +198,12 @@ Miscellaneous
 Conversion
 ----------

+.. note::
+    Beware, some of these conversion transforms below will scale the values
+    while performing the conversion, while some may not do any scaling. By
+    scaling, we mean e.g. that a ``uint8`` -> ``float32`` would map the [0,
+    255] range into [0, 1] (and vice-versa).
+    
 .. autosummary::
    :toctree: generated/
    :template: class.rst
@@ -211,8 +217,8 @@ Conversion
    v2.PILToTensor
    v2.ToImageTensor
    ConvertImageDtype
-    v2.ConvertImageDtype
    v2.ConvertDtype
+    v2.ConvertImageDtype
    v2.ToDtype
    v2.ConvertBoundingBoxFormat


--- a/torchvision/transforms/transforms.py
+++ b/torchvision/transforms/transforms.py
@@ -105,7 +105,9 @@ class Compose:


 class ToTensor:
-    """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. This transform does not support torchscript.
+    """Convert a PIL Image or ndarray to tensor and scale the values accordingly.
+
+    This transform does not support torchscript.

    Converts a PIL Image or numpy.ndarray (H x W x C) in the range
    [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
@@ -139,7 +141,9 @@ class ToTensor:


 class PILToTensor:
-    """Convert a ``PIL Image`` to a tensor of the same type. This transform does not support torchscript.
+    """Convert a PIL Image to a tensor of the same type - this does not scale values.
+
+    This transform does not support torchscript.

    Converts a PIL Image (H x W x C) to a Tensor of shape (C x H x W).
    """
@@ -166,7 +170,8 @@ class PILToTensor:


 class ConvertImageDtype(torch.nn.Module):
-    """Convert a tensor image to the given ``dtype`` and scale the values accordingly
+    """Convert a tensor image to the given ``dtype`` and scale the values accordingly.
+
    This function does not support PIL Image.

    Args:
@@ -194,7 +199,9 @@ class ConvertImageDtype(torch.nn.Module):


 class ToPILImage:
-    """Convert a tensor or an ndarray to PIL Image. This transform does not support torchscript.
+    """Convert a tensor or an ndarray to PIL Image - this does not scale values.
+
+    This transform does not support torchscript.

    Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape
    H x W x C to a PIL Image while preserving the value range.

--- a/torchvision/transforms/v2/_container.py
+++ b/torchvision/transforms/v2/_container.py
@@ -138,9 +138,7 @@ class RandomChoice(Transform):
        if p is None:
            p = [1] * len(transforms)
        elif len(p) != len(transforms):
-            raise ValueError(
-                f"Length of p doesn't match the number of transforms: " f"{len(p)} != {len(transforms)}"
-            )
+            raise ValueError(f"Length of p doesn't match the number of transforms: {len(p)} != {len(transforms)}")

        super().__init__()


--- a/torchvision/transforms/v2/_deprecated.py
+++ b/torchvision/transforms/v2/_deprecated.py
@@ -10,7 +10,7 @@ from torchvision.transforms.v2 import Transform


 class ToTensor(Transform):
-    """[BETA] Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
+    """[BETA] Convert a PIL Image or ndarray to tensor and scale the values accordingly.

    .. betastatus:: ToTensor transform


--- a/torchvision/transforms/v2/_meta.py
+++ b/torchvision/transforms/v2/_meta.py
@@ -9,7 +9,7 @@ from .utils import is_simple_tensor


 class ConvertBoundingBoxFormat(Transform):
-    """[BETA] Convert bounding box coordinates to the given ``format``, e.g. from "CXCYWH" to "XYXY".
+    """[BETA] Convert bounding box coordinates to the given ``format``, eg from "CXCYWH" to "XYXY".

    .. betastatus:: ConvertBoundingBoxFormat transform

@@ -18,6 +18,7 @@ class ConvertBoundingBoxFormat(Transform):
            Possible values are defined by :class:`~torchvision.datapoints.BoundingBoxFormat` and
            string values match the enums, e.g. "XYXY" or "XYWH" etc.
    """
+
    _transformed_types = (datapoints.BoundingBox,)

    def __init__(self, format: Union[str, datapoints.BoundingBoxFormat]) -> None:
@@ -79,6 +80,7 @@ class ClampBoundingBox(Transform):
    .. betastatus:: ClampBoundingBox transform

    """
+
    _transformed_types = (datapoints.BoundingBox,)

    def _transform(self, inpt: datapoints.BoundingBox, params: Dict[str, Any]) -> datapoints.BoundingBox:

--- a/torchvision/transforms/v2/_misc.py
+++ b/torchvision/transforms/v2/_misc.py
@@ -223,13 +223,15 @@ class GaussianBlur(Transform):


 class ToDtype(Transform):
-    """[BETA] Converts the input to a specific dtype.
+    """[BETA] Converts the input to a specific dtype - this does not scale values.

    .. betastatus:: ToDtype transform

    Args:
-        dtype (dtype or dict of Datapoint -> dtype): The dtype to convert to. A dict can be passed to specify
-            per-datapoint conversions, e.g. ``dtype={datapoints.Image: torch.float32, datapoints.Video: torch.float64}``.
+        dtype (``torch.dtype`` or dict of ``Datapoint`` -> ``torch.dtype``): The dtype to convert to.
+            A dict can be passed to specify per-datapoint conversions, e.g.
+            ``dtype={datapoints.Image: torch.float32, datapoints.Video:
+            torch.float64}``.
    """

    _transformed_types = (torch.Tensor,)

--- a/torchvision/transforms/v2/_type_conversion.py
+++ b/torchvision/transforms/v2/_type_conversion.py
@@ -11,7 +11,7 @@ from torchvision.transforms.v2.utils import is_simple_tensor


 class PILToTensor(Transform):
-    """[BETA] Convert a ``PIL Image`` to a tensor of the same type.
+    """[BETA] Convert a PIL Image to a tensor of the same type - this does not scale values.

    .. betastatus:: PILToTensor transform

@@ -27,7 +27,8 @@ class PILToTensor(Transform):


 class ToImageTensor(Transform):
-    """[BETA] Convert a tensor or an ndarray or PIL Image to :class:`~torchvision.datapoints.Image`.
+    """[BETA] Convert a tensor, ndarray, or PIL Image to :class:`~torchvision.datapoints.Image`
+    ; this does not scale values.

    .. betastatus:: ToImageTensor transform

@@ -43,7 +44,7 @@ class ToImageTensor(Transform):


 class ToImagePIL(Transform):
-    """[BETA] Convert a tensor or an ndarray to PIL Image.
+    """[BETA] Convert a tensor or an ndarray to PIL Image - this does not scale values.

    .. betastatus:: ToImagePIL transform