Add functional transforms to docs (#499)

8a4786a2 · Vishwak Srinivasan · Francisco Massa · 47214f05 · 8a4786a2 · 8a4786a2
Commit 8a4786a2 authored May 16, 2018 by Vishwak Srinivasan Committed by Francisco Massa May 15, 2018
Hide whitespace changes
Inline Side-by-side

Showing with 68 additions and 48 deletions

docs/source/transforms.rst docs/source/transforms.rst +6 -0

torchvision/transforms/functional.py torchvision/transforms/functional.py +62 -48

No files found.
--- a/docs/source/transforms.rst
+++ b/docs/source/transforms.rst
@@ -74,3 +74,9 @@ Generic Transforms

 .. autoclass:: Lambda

+
+Functional Transforms
+---------------------
+
+.. automodule:: torchvision.transforms.functional
+    :members:
--- a/torchvision/transforms/functional.py
+++ b/torchvision/transforms/functional.py
@@ -88,7 +88,7 @@ def to_tensor(pic):
 def to_pil_image(pic, mode=None):
    """Convert a tensor or an ndarray to PIL Image.

-    See :class:`~torchvision.transforms.ToPIlImage` for more details.
+    See :class:`~torchvision.transforms.ToPILImage` for more details.

    Args:
        pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
@@ -151,7 +151,7 @@ def to_pil_image(pic, mode=None):
 def normalize(tensor, mean, std):
    """Normalize a tensor image with mean and standard deviation.

-    See ``Normalize`` for more details.
+    See :class:`~torchvision.transforms.Normalize` for more details.

    Args:
        tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
@@ -170,7 +170,7 @@ def normalize(tensor, mean, std):


 def resize(img, size, interpolation=Image.BILINEAR):
-    """Resize the input PIL Image to the given size.
+    r"""Resize the input PIL Image to the given size.

    Args:
        img (PIL Image): Image to be resized.
@@ -178,7 +178,7 @@ def resize(img, size, interpolation=Image.BILINEAR):
            (h, w), the output size will be matched to this. If size is an int,
            the smaller edge of the image will be matched to this number maintaing
            the aspect ratio. i.e, if height > width, then image will be rescaled to
-            (size * height / width, size)
+            :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)`
        interpolation (int, optional): Desired interpolation. Default is
            ``PIL.Image.BILINEAR``

@@ -213,7 +213,7 @@ def scale(*args, **kwargs):


 def pad(img, padding, fill=0, padding_mode='constant'):
-    """Pad the given PIL Image on all sides with speficified padding mode and fill value.
+    r"""Pad the given PIL Image on all sides with speficified padding mode and fill value.

    Args:
        img (PIL Image): Image to be padded.
@@ -226,14 +226,20 @@ def pad(img, padding, fill=0, padding_mode='constant'):
            length 3, it is used to fill R, G, B channels respectively.
            This value is only used when the padding_mode is constant
        padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant.
-            constant: pads with a constant value, this value is specified with fill
-            edge: pads with the last value on the edge of the image
-            reflect: pads with reflection of image (without repeating the last value on the edge)
-                padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
-                will result in [3, 2, 1, 2, 3, 4, 3, 2]
-            symmetric: pads with reflection of image (repeating the last value on the edge)
-                padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
-                will result in [2, 1, 1, 2, 3, 4, 4, 3]
+
+            - constant: pads with a constant value, this value is specified with fill
+
+            - edge: pads with the last value on the edge of the image
+
+            - reflect: pads with reflection of image (without repeating the last value on the edge)
+
+                       padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
+                       will result in [3, 2, 1, 2, 3, 4, 3, 2]
+
+            - symmetric: pads with reflection of image (repeating the last value on the edge)
+
+                         padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
+                         will result in [2, 1, 1, 2, 3, 4, 4, 3]

    Returns:
        PIL Image: Padded image.
@@ -312,7 +318,7 @@ def center_crop(img, output_size):
 def resized_crop(img, i, j, h, w, size, interpolation=Image.BILINEAR):
    """Crop the given PIL Image and resize it to desired size.

-    Notably used in RandomResizedCrop.
+    Notably used in :class:`~torchvision.transforms.RandomResizedCrop`.

    Args:
        img (PIL Image): Image to be cropped.
@@ -373,9 +379,10 @@ def five_crop(img, size):
       size (sequence or int): Desired output size of the crop. If size is an
           int instead of sequence like (h, w), a square crop (size, size) is
           made.
+
    Returns:
-        tuple: tuple (tl, tr, bl, br, center) corresponding top left,
-            top right, bottom left, bottom right and center crop.
+       tuple: tuple (tl, tr, bl, br, center)
+                Corresponding top left, top right, bottom left, bottom right and center crop.
    """
    if isinstance(size, numbers.Number):
        size = (int(size), int(size))
@@ -396,24 +403,23 @@ def five_crop(img, size):


 def ten_crop(img, size, vertical_flip=False):
-    """Crop the given PIL Image into four corners and the central crop plus the
-       flipped version of these (horizontal flipping is used by default).
+    r"""Crop the given PIL Image into four corners and the central crop plus the
+        flipped version of these (horizontal flipping is used by default).

    .. Note::
        This transform returns a tuple of images and there may be a
        mismatch in the number of inputs and targets your ``Dataset`` returns.

-       Args:
-           size (sequence or int): Desired output size of the crop. If size is an
-               int instead of sequence like (h, w), a square crop (size, size) is
-               made.
-           vertical_flip (bool): Use vertical flipping instead of horizontal
-
-        Returns:
-            tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip,
-                br_flip, center_flip) corresponding top left, top right,
-                bottom left, bottom right and center crop and same for the
-                flipped image.
+    Args:
+       size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made.
+       vertical_flip (bool): Use vertical flipping instead of horizontal
+
+    Returns:
+       tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip)
+                Corresponding top left, top right, bottom left, bottom right and center crop
+                and same for the flipped image.
    """
    if isinstance(size, numbers.Number):
        size = (int(size), int(size))
@@ -501,7 +507,9 @@ def adjust_hue(img, hue_factor):
    `hue_factor` is the amount of shift in H channel and must be in the
    interval `[-0.5, 0.5]`.

-    See https://en.wikipedia.org/wiki/Hue for more details on Hue.
+    See `Hue`_ for more details.
+
+    .. _Hue: https://en.wikipedia.org/wiki/Hue

    Args:
        img (PIL Image): PIL Image to be adjusted.
@@ -537,20 +545,23 @@ def adjust_hue(img, hue_factor):


 def adjust_gamma(img, gamma, gain=1):
-    """Perform gamma correction on an image.
+    r"""Perform gamma correction on an image.

    Also known as Power Law Transform. Intensities in RGB mode are adjusted
    based on the following equation:

-        I_out = 255 * gain * ((I_in / 255) ** gamma)
+    .. math::
+        I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma}
+
+    See `Gamma Correction`_ for more details.

-    See https://en.wikipedia.org/wiki/Gamma_correction for more details.
+    .. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction

    Args:
        img (PIL Image): PIL Image to be adjusted.
-        gamma (float): Non negative real number. gamma larger than 1 make the
-            shadows darker, while gamma smaller than 1 make dark regions
-            lighter.
+        gamma (float): Non negative real number, same as :math:`\gamma` in the equation.
+            gamma larger than 1 make the shadows darker,
+            while gamma smaller than 1 make dark regions lighter.
        gain (float): The constant multiplier.
    """
    if not _is_pil_image(img):
@@ -575,11 +586,10 @@ def rotate(img, angle, resample=False, expand=False, center=None):

    Args:
        img (PIL Image): PIL Image to be rotated.
-        angle ({float, int}): In degrees degrees counter clockwise order.
-        resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional):
-            An optional resampling filter.
-            See http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#filters
-            If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST.
+        angle (float or int): In degrees degrees counter clockwise order.
+        resample (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional):
+            An optional resampling filter. See `filters`_ for more information.
+            If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``.
        expand (bool, optional): Optional expansion flag.
            If true, expands the output image to make it large enough to hold the entire rotated image.
            If false or omitted, make the output image the same size as the input image.
@@ -587,6 +597,9 @@ def rotate(img, angle, resample=False, expand=False, center=None):
        center (2-tuple, optional): Optional center of rotation.
            Origin is the upper left corner.
            Default is the center of the image.
+
+    .. _filters: http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#filters
+
    """

    if not _is_pil_image(img):
@@ -635,14 +648,14 @@ def affine(img, angle, translate, scale, shear, resample=0, fillcolor=None):

    Args:
        img (PIL Image): PIL Image to be rotated.
-        angle ({float, int}): rotation angle in degrees between -180 and 180, clockwise direction.
+        angle (float or int): rotation angle in degrees between -180 and 180, clockwise direction.
        translate (list or tuple of integers): horizontal and vertical translations (post-rotation translation)
        scale (float): overall scale
        shear (float): shear angle value in degrees between -180 to 180, clockwise direction.
-        resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional):
+        resample (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional):
            An optional resampling filter.
-            See http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#filters
-            If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST.
+            See `filters`_ for more information.
+            If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``.
        fillcolor (int): Optional fill color for the area outside the transform in the output image. (Pillow>=5.0.0)
    """
    if not _is_pil_image(img):
@@ -667,9 +680,10 @@ def to_grayscale(img, num_output_channels=1):
        img (PIL Image): Image to be converted to grayscale.

    Returns:
-        PIL Image:  Grayscale version of the image.
-                    if num_output_channels == 1 : returned image is single channel
-                    if num_output_channels == 3 : returned image is 3 channel with r == g == b
+        PIL Image: Grayscale version of the image.
+            if num_output_channels = 1 : returned image is single channel
+
+            if num_output_channels = 3 : returned image is 3 channel with r = g = b
    """
    if not _is_pil_image(img):
        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))