Unverified Commit 7b9d30eb authored by Zhengyang Feng's avatar Zhengyang Feng Committed by GitHub
Browse files

Transforms documentation clean-up (#3200)

* Initial doc clean-up

* Remove all private docs

* Rename files

* Highlight backend inconsistencies

* Sequence and number

* [Need checking] AutoAugment related doc change

* Revert name changes
parent 8c0bb6c2
...@@ -127,8 +127,9 @@ def _get_magnitudes(): ...@@ -127,8 +127,9 @@ def _get_magnitudes():
class AutoAugment(torch.nn.Module): class AutoAugment(torch.nn.Module):
r"""AutoAugment data augmentation method based on r"""AutoAugment data augmentation method based on
`"AutoAugment: Learning Augmentation Strategies from Data" <https://arxiv.org/pdf/1805.09501.pdf>`_. `"AutoAugment: Learning Augmentation Strategies from Data" <https://arxiv.org/pdf/1805.09501.pdf>`_.
The image can be a PIL Image or a Tensor, in which case it is expected If the image is torch Tensor, it should be of type torch.uint8, and it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args: Args:
policy (AutoAugmentPolicy): Desired policy enum defined by policy (AutoAugmentPolicy): Desired policy enum defined by
...@@ -136,9 +137,8 @@ class AutoAugment(torch.nn.Module): ...@@ -136,9 +137,8 @@ class AutoAugment(torch.nn.Module):
interpolation (InterpolationMode): Desired interpolation enum defined by interpolation (InterpolationMode): Desired interpolation enum defined by
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed fill (sequence or number, optional): Pixel fill value for the area outside the transformed
image. If int or float, the value is used for all bands respectively. image. If given a number, the value is used for all bands respectively.
This option is supported for PIL image and Tensor inputs.
If input is PIL Image, the options is only available for ``Pillow>=5.0.0``. If input is PIL Image, the options is only available for ``Pillow>=5.0.0``.
Example: Example:
......
...@@ -59,7 +59,7 @@ _parse_fill = F_pil._parse_fill ...@@ -59,7 +59,7 @@ _parse_fill = F_pil._parse_fill
def _get_image_size(img: Tensor) -> List[int]: def _get_image_size(img: Tensor) -> List[int]:
"""Returns image sizea as (w, h) """Returns image size as [w, h]
""" """
if isinstance(img, torch.Tensor): if isinstance(img, torch.Tensor):
return F_t._get_image_size(img) return F_t._get_image_size(img)
...@@ -68,6 +68,8 @@ def _get_image_size(img: Tensor) -> List[int]: ...@@ -68,6 +68,8 @@ def _get_image_size(img: Tensor) -> List[int]:
def _get_image_num_channels(img: Tensor) -> int: def _get_image_num_channels(img: Tensor) -> int:
"""Returns number of image channels
"""
if isinstance(img, torch.Tensor): if isinstance(img, torch.Tensor):
return F_t._get_image_num_channels(img) return F_t._get_image_num_channels(img)
...@@ -86,8 +88,9 @@ def _is_numpy_image(img: Any) -> bool: ...@@ -86,8 +88,9 @@ def _is_numpy_image(img: Any) -> bool:
def to_tensor(pic): def to_tensor(pic):
"""Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
This function does not support torchscript.
See ``ToTensor`` for more details. See :class:`~torchvision.transforms.ToTensor` for more details.
Args: Args:
pic (PIL Image or numpy.ndarray): Image to be converted to tensor. pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
...@@ -141,6 +144,7 @@ def to_tensor(pic): ...@@ -141,6 +144,7 @@ def to_tensor(pic):
def pil_to_tensor(pic): def pil_to_tensor(pic):
"""Convert a ``PIL Image`` to a tensor of the same type. """Convert a ``PIL Image`` to a tensor of the same type.
This function does not support torchscript.
See :class:`~torchvision.transforms.PILToTensor` for more details. See :class:`~torchvision.transforms.PILToTensor` for more details.
...@@ -169,6 +173,7 @@ def pil_to_tensor(pic): ...@@ -169,6 +173,7 @@ def pil_to_tensor(pic):
def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float) -> torch.Tensor: def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float) -> torch.Tensor:
"""Convert a tensor image to the given ``dtype`` and scale the values accordingly """Convert a tensor image to the given ``dtype`` and scale the values accordingly
This function does not support PIL Image.
Args: Args:
image (torch.Tensor): Image to be converted image (torch.Tensor): Image to be converted
...@@ -195,7 +200,7 @@ def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float) - ...@@ -195,7 +200,7 @@ def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float) -
def to_pil_image(pic, mode=None): def to_pil_image(pic, mode=None):
"""Convert a tensor or an ndarray to PIL Image. """Convert a tensor or an ndarray to PIL Image. This function does not support torchscript.
See :class:`~torchvision.transforms.ToPILImage` for more details. See :class:`~torchvision.transforms.ToPILImage` for more details.
...@@ -291,6 +296,7 @@ def to_pil_image(pic, mode=None): ...@@ -291,6 +296,7 @@ def to_pil_image(pic, mode=None):
def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool = False) -> Tensor: def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool = False) -> Tensor:
"""Normalize a tensor image with mean and standard deviation. """Normalize a tensor image with mean and standard deviation.
This transform does not support PIL Image.
.. note:: .. note::
This transform acts out of place by default, i.e., it does not mutates the input tensor. This transform acts out of place by default, i.e., it does not mutates the input tensor.
...@@ -331,7 +337,7 @@ def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool ...@@ -331,7 +337,7 @@ def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool
def resize(img: Tensor, size: List[int], interpolation: InterpolationMode = InterpolationMode.BILINEAR) -> Tensor: def resize(img: Tensor, size: List[int], interpolation: InterpolationMode = InterpolationMode.BILINEAR) -> Tensor:
r"""Resize the input image to the given size. r"""Resize the input image to the given size.
The image can be a PIL Image or a torch Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
Args: Args:
...@@ -341,8 +347,7 @@ def resize(img: Tensor, size: List[int], interpolation: InterpolationMode = Inte ...@@ -341,8 +347,7 @@ def resize(img: Tensor, size: List[int], interpolation: InterpolationMode = Inte
the smaller edge of the image will be matched to this number maintaining the smaller edge of the image will be matched to this number maintaining
the aspect ratio. i.e, if height > width, then image will be rescaled to the aspect ratio. i.e, if height > width, then image will be rescaled to
:math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)`. :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)`.
In torchscript mode size as single int is not supported, use a tuple or In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``.
list of length 1: ``[size, ]``.
interpolation (InterpolationMode): Desired interpolation enum defined by interpolation (InterpolationMode): Desired interpolation enum defined by
:class:`torchvision.transforms.InterpolationMode`. :class:`torchvision.transforms.InterpolationMode`.
Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``, Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``,
...@@ -378,22 +383,22 @@ def scale(*args, **kwargs): ...@@ -378,22 +383,22 @@ def scale(*args, **kwargs):
def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "constant") -> Tensor: def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "constant") -> Tensor:
r"""Pad the given image on all sides with the given "pad" value. r"""Pad the given image on all sides with the given "pad" value.
The image can be a PIL Image or a torch Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
Args: Args:
img (PIL Image or Tensor): Image to be padded. img (PIL Image or Tensor): Image to be padded.
padding (int or tuple or list): Padding on each border. If a single int is provided this padding (int or sequence): Padding on each border. If a single int is provided this
is used to pad all borders. If tuple of length 2 is provided this is the padding is used to pad all borders. If sequence of length 2 is provided this is the padding
on left/right and top/bottom respectively. If a tuple of length 4 is provided on left/right and top/bottom respectively. If a sequence of length 4 is provided
this is the padding for the left, top, right and bottom borders respectively. this is the padding for the left, top, right and bottom borders respectively.
In torchscript mode padding as single int is not supported, use a tuple or In torchscript mode padding as single int is not supported, use a sequence of length 1: ``[padding, ]``.
list of length 1: ``[padding, ]``. fill (number or str or tuple): Pixel fill value for constant fill. Default is 0.
fill (int or str or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of If a tuple of length 3, it is used to fill R, G, B channels respectively.
length 3, it is used to fill R, G, B channels respectively. This value is only used when the padding_mode is constant.
This value is only used when the padding_mode is constant. Only int value is supported for Tensors. Only number is supported for torch Tensor.
Only int or str or tuple value is supported for PIL Image.
padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant. padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant.
Mode symmetric is not yet supported for Tensor inputs.
- constant: pads with a constant value, this value is specified with fill - constant: pads with a constant value, this value is specified with fill
...@@ -420,9 +425,8 @@ def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "con ...@@ -420,9 +425,8 @@ def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "con
def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor: def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor:
"""Crop the given image at specified location and output size. """Crop the given image at specified location and output size.
The image can be a PIL Image or a Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
dimensions
Args: Args:
img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image. img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
...@@ -443,12 +447,12 @@ def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor: ...@@ -443,12 +447,12 @@ def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor:
def center_crop(img: Tensor, output_size: List[int]) -> Tensor: def center_crop(img: Tensor, output_size: List[int]) -> Tensor:
"""Crops the given image at the center. """Crops the given image at the center.
The image can be a PIL Image or a Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
Args: Args:
img (PIL Image or Tensor): Image to be cropped. img (PIL Image or Tensor): Image to be cropped.
output_size (sequence or int): (height, width) of the crop box. If int or sequence with single int output_size (sequence or int): (height, width) of the crop box. If int or sequence with single int,
it is used for both directions. it is used for both directions.
Returns: Returns:
...@@ -472,7 +476,7 @@ def resized_crop( ...@@ -472,7 +476,7 @@ def resized_crop(
interpolation: InterpolationMode = InterpolationMode.BILINEAR interpolation: InterpolationMode = InterpolationMode.BILINEAR
) -> Tensor: ) -> Tensor:
"""Crop the given image and resize it to desired size. """Crop the given image and resize it to desired size.
The image can be a PIL Image or a Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
Notably used in :class:`~torchvision.transforms.RandomResizedCrop`. Notably used in :class:`~torchvision.transforms.RandomResizedCrop`.
...@@ -499,12 +503,12 @@ def resized_crop( ...@@ -499,12 +503,12 @@ def resized_crop(
def hflip(img: Tensor) -> Tensor: def hflip(img: Tensor) -> Tensor:
"""Horizontally flip the given PIL Image or Tensor. """Horizontally flip the given image.
Args: Args:
img (PIL Image or Tensor): Image to be flipped. If img img (PIL Image or Tensor): Image to be flipped. If img
is a Tensor, it is expected to be in [..., H, W] format, is a Tensor, it is expected to be in [..., H, W] format,
where ... means it can have an arbitrary number of trailing where ... means it can have an arbitrary number of leading
dimensions. dimensions.
Returns: Returns:
...@@ -554,7 +558,7 @@ def perspective( ...@@ -554,7 +558,7 @@ def perspective(
fill: Optional[List[float]] = None fill: Optional[List[float]] = None
) -> Tensor: ) -> Tensor:
"""Perform perspective transform of the given image. """Perform perspective transform of the given image.
The image can be a PIL Image or a Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
Args: Args:
...@@ -567,11 +571,10 @@ def perspective( ...@@ -567,11 +571,10 @@ def perspective(
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``. :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed fill (sequence or number, optional): Pixel fill value for the area outside the transformed
image. If int or float, the value is used for all bands respectively. image. If given a number, the value is used for all bands respectively.
This option is supported for PIL image and Tensor inputs. In torchscript mode single int/float value is not supported, please use a sequence
In torchscript mode single int/float value is not supported, please use a tuple of length 1: ``[value, ]``.
or list of length 1: ``[value, ]``.
If input is PIL Image, the options is only available for ``Pillow>=5.0.0``. If input is PIL Image, the options is only available for ``Pillow>=5.0.0``.
Returns: Returns:
...@@ -599,16 +602,16 @@ def perspective( ...@@ -599,16 +602,16 @@ def perspective(
def vflip(img: Tensor) -> Tensor: def vflip(img: Tensor) -> Tensor:
"""Vertically flip the given PIL Image or torch Tensor. """Vertically flip the given image.
Args: Args:
img (PIL Image or Tensor): Image to be flipped. If img img (PIL Image or Tensor): Image to be flipped. If img
is a Tensor, it is expected to be in [..., H, W] format, is a Tensor, it is expected to be in [..., H, W] format,
where ... means it can have an arbitrary number of trailing where ... means it can have an arbitrary number of leading
dimensions. dimensions.
Returns: Returns:
PIL Image: Vertically flipped image. PIL Image or Tensor: Vertically flipped image.
""" """
if not isinstance(img, torch.Tensor): if not isinstance(img, torch.Tensor):
return F_pil.vflip(img) return F_pil.vflip(img)
...@@ -618,7 +621,7 @@ def vflip(img: Tensor) -> Tensor: ...@@ -618,7 +621,7 @@ def vflip(img: Tensor) -> Tensor:
def five_crop(img: Tensor, size: List[int]) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: def five_crop(img: Tensor, size: List[int]) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
"""Crop the given image into four corners and the central crop. """Crop the given image into four corners and the central crop.
The image can be a PIL Image or a Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
.. Note:: .. Note::
...@@ -629,7 +632,7 @@ def five_crop(img: Tensor, size: List[int]) -> Tuple[Tensor, Tensor, Tensor, Ten ...@@ -629,7 +632,7 @@ def five_crop(img: Tensor, size: List[int]) -> Tuple[Tensor, Tensor, Tensor, Ten
img (PIL Image or Tensor): Image to be cropped. img (PIL Image or Tensor): Image to be cropped.
size (sequence or int): Desired output size of the crop. If size is an size (sequence or int): Desired output size of the crop. If size is an
int instead of sequence like (h, w), a square crop (size, size) is int instead of sequence like (h, w), a square crop (size, size) is
made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]). made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
Returns: Returns:
tuple: tuple (tl, tr, bl, br, center) tuple: tuple (tl, tr, bl, br, center)
...@@ -663,7 +666,7 @@ def ten_crop(img: Tensor, size: List[int], vertical_flip: bool = False) -> List[ ...@@ -663,7 +666,7 @@ def ten_crop(img: Tensor, size: List[int], vertical_flip: bool = False) -> List[
"""Generate ten cropped images from the given image. """Generate ten cropped images from the given image.
Crop the given image into four corners and the central crop plus the Crop the given image into four corners and the central crop plus the
flipped version of these (horizontal flipping is used by default). flipped version of these (horizontal flipping is used by default).
The image can be a PIL Image or a Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
.. Note:: .. Note::
...@@ -674,7 +677,7 @@ def ten_crop(img: Tensor, size: List[int], vertical_flip: bool = False) -> List[ ...@@ -674,7 +677,7 @@ def ten_crop(img: Tensor, size: List[int], vertical_flip: bool = False) -> List[
img (PIL Image or Tensor): Image to be cropped. img (PIL Image or Tensor): Image to be cropped.
size (sequence or int): Desired output size of the crop. If size is an size (sequence or int): Desired output size of the crop. If size is an
int instead of sequence like (h, w), a square crop (size, size) is int instead of sequence like (h, w), a square crop (size, size) is
made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]). made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
vertical_flip (bool): Use vertical flipping instead of horizontal vertical_flip (bool): Use vertical flipping instead of horizontal
Returns: Returns:
...@@ -702,10 +705,12 @@ def ten_crop(img: Tensor, size: List[int], vertical_flip: bool = False) -> List[ ...@@ -702,10 +705,12 @@ def ten_crop(img: Tensor, size: List[int], vertical_flip: bool = False) -> List[
def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor: def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor:
"""Adjust brightness of an Image. """Adjust brightness of an image.
Args: Args:
img (PIL Image or Tensor): Image to be adjusted. img (PIL Image or Tensor): Image to be adjusted.
If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format,
where ... means it can have an arbitrary number of leading dimensions.
brightness_factor (float): How much to adjust the brightness. Can be brightness_factor (float): How much to adjust the brightness. Can be
any non negative number. 0 gives a black image, 1 gives the any non negative number. 0 gives a black image, 1 gives the
original image while 2 increases the brightness by a factor of 2. original image while 2 increases the brightness by a factor of 2.
...@@ -720,7 +725,7 @@ def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor: ...@@ -720,7 +725,7 @@ def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor:
def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor: def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor:
"""Adjust contrast of an Image. """Adjust contrast of an image.
Args: Args:
img (PIL Image or Tensor): Image to be adjusted. img (PIL Image or Tensor): Image to be adjusted.
...@@ -801,6 +806,8 @@ def adjust_gamma(img: Tensor, gamma: float, gain: float = 1) -> Tensor: ...@@ -801,6 +806,8 @@ def adjust_gamma(img: Tensor, gamma: float, gain: float = 1) -> Tensor:
Args: Args:
img (PIL Image or Tensor): PIL Image to be adjusted. img (PIL Image or Tensor): PIL Image to be adjusted.
If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format,
where ... means it can have an arbitrary number of leading dimensions.
gamma (float): Non negative real number, same as :math:`\gamma` in the equation. gamma (float): Non negative real number, same as :math:`\gamma` in the equation.
gamma larger than 1 make the shadows darker, gamma larger than 1 make the shadows darker,
while gamma smaller than 1 make dark regions lighter. while gamma smaller than 1 make dark regions lighter.
...@@ -870,12 +877,12 @@ def rotate( ...@@ -870,12 +877,12 @@ def rotate(
fill: Optional[List[float]] = None, resample: Optional[int] = None fill: Optional[List[float]] = None, resample: Optional[int] = None
) -> Tensor: ) -> Tensor:
"""Rotate the image by angle. """Rotate the image by angle.
The image can be a PIL Image or a Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
Args: Args:
img (PIL Image or Tensor): image to be rotated. img (PIL Image or Tensor): image to be rotated.
angle (float or int): rotation angle value in degrees, counter-clockwise. angle (number): rotation angle value in degrees, counter-clockwise.
interpolation (InterpolationMode): Desired interpolation enum defined by interpolation (InterpolationMode): Desired interpolation enum defined by
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
...@@ -884,13 +891,12 @@ def rotate( ...@@ -884,13 +891,12 @@ def rotate(
If true, expands the output image to make it large enough to hold the entire rotated image. If true, expands the output image to make it large enough to hold the entire rotated image.
If false or omitted, make the output image the same size as the input image. If false or omitted, make the output image the same size as the input image.
Note that the expand flag assumes rotation around the center and no translation. Note that the expand flag assumes rotation around the center and no translation.
center (list or tuple, optional): Optional center of rotation. Origin is the upper left corner. center (sequence, optional): Optional center of rotation. Origin is the upper left corner.
Default is the center of the image. Default is the center of the image.
fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed fill (sequence or number, optional): Pixel fill value for the area outside the transformed
image. If int or float, the value is used for all bands respectively. image. If given a number, the value is used for all bands respectively.
This option is supported for PIL image and Tensor inputs. In torchscript mode single int/float value is not supported, please use a sequence
In torchscript mode single int/float value is not supported, please use a tuple of length 1: ``[value, ]``.
or list of length 1: ``[value, ]``.
If input is PIL Image, the options is only available for ``Pillow>=5.2.0``. If input is PIL Image, the options is only available for ``Pillow>=5.2.0``.
Returns: Returns:
...@@ -944,26 +950,25 @@ def affine( ...@@ -944,26 +950,25 @@ def affine(
resample: Optional[int] = None, fillcolor: Optional[List[float]] = None resample: Optional[int] = None, fillcolor: Optional[List[float]] = None
) -> Tensor: ) -> Tensor:
"""Apply affine transformation on the image keeping image center invariant. """Apply affine transformation on the image keeping image center invariant.
The image can be a PIL Image or a Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
Args: Args:
img (PIL Image or Tensor): image to transform. img (PIL Image or Tensor): image to transform.
angle (float or int): rotation angle in degrees between -180 and 180, clockwise direction. angle (number): rotation angle in degrees between -180 and 180, clockwise direction.
translate (list or tuple of integers): horizontal and vertical translations (post-rotation translation) translate (sequence of integers): horizontal and vertical translations (post-rotation translation)
scale (float): overall scale scale (float): overall scale
shear (float or tuple or list): shear angle value in degrees between -180 to 180, clockwise direction. shear (float or sequence): shear angle value in degrees between -180 to 180, clockwise direction.
If a tuple of list is specified, the first value corresponds to a shear parallel to the x axis, while If a sequence is specified, the first value corresponds to a shear parallel to the x axis, while
the second value corresponds to a shear parallel to the y axis. the second value corresponds to a shear parallel to the y axis.
interpolation (InterpolationMode): Desired interpolation enum defined by interpolation (InterpolationMode): Desired interpolation enum defined by
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed fill (sequence or number, optional): Pixel fill value for the area outside the transformed
image. If int or float, the value is used for all bands respectively. image. If given a number, the value is used for all bands respectively.
This option is supported for PIL image and Tensor inputs. In torchscript mode single int/float value is not supported, please use a sequence
In torchscript mode single int/float value is not supported, please use a tuple of length 1: ``[value, ]``.
or list of length 1: ``[value, ]``.
If input is PIL Image, the options is only available for ``Pillow>=5.0.0``. If input is PIL Image, the options is only available for ``Pillow>=5.0.0``.
fillcolor (sequence, int, float): deprecated argument and will be removed since v0.10.0. fillcolor (sequence, int, float): deprecated argument and will be removed since v0.10.0.
Please use `arg`:fill: instead. Please use `arg`:fill: instead.
...@@ -1047,10 +1052,11 @@ def affine( ...@@ -1047,10 +1052,11 @@ def affine(
@torch.jit.unused @torch.jit.unused
def to_grayscale(img, num_output_channels=1): def to_grayscale(img, num_output_channels=1):
"""Convert PIL image of any mode (RGB, HSV, LAB, etc) to grayscale version of image. """Convert PIL image of any mode (RGB, HSV, LAB, etc) to grayscale version of image.
This transform does not support torch Tensor.
Args: Args:
img (PIL Image): PIL Image to be converted to grayscale. img (PIL Image): PIL Image to be converted to grayscale.
num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default, 1. num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default is 1.
Returns: Returns:
PIL Image: Grayscale version of the image. PIL Image: Grayscale version of the image.
...@@ -1066,8 +1072,8 @@ def to_grayscale(img, num_output_channels=1): ...@@ -1066,8 +1072,8 @@ def to_grayscale(img, num_output_channels=1):
def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor: def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor:
"""Convert RGB image to grayscale version of image. """Convert RGB image to grayscale version of image.
The image can be a PIL Image or a Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions
Note: Note:
Please, note that this method supports only RGB images as input. For inputs in other color spaces, Please, note that this method supports only RGB images as input. For inputs in other color spaces,
...@@ -1091,6 +1097,7 @@ def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor: ...@@ -1091,6 +1097,7 @@ def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor:
def erase(img: Tensor, i: int, j: int, h: int, w: int, v: Tensor, inplace: bool = False) -> Tensor: def erase(img: Tensor, i: int, j: int, h: int, w: int, v: Tensor, inplace: bool = False) -> Tensor:
""" Erase the input Tensor Image with given value. """ Erase the input Tensor Image with given value.
This transform does not support PIL Image.
Args: Args:
img (Tensor Image): Tensor image of size (C, H, W) to be erased img (Tensor Image): Tensor image of size (C, H, W) to be erased
...@@ -1115,22 +1122,21 @@ def erase(img: Tensor, i: int, j: int, h: int, w: int, v: Tensor, inplace: bool ...@@ -1115,22 +1122,21 @@ def erase(img: Tensor, i: int, j: int, h: int, w: int, v: Tensor, inplace: bool
def gaussian_blur(img: Tensor, kernel_size: List[int], sigma: Optional[List[float]] = None) -> Tensor: def gaussian_blur(img: Tensor, kernel_size: List[int], sigma: Optional[List[float]] = None) -> Tensor:
"""Performs Gaussian blurring on the img by given kernel. """Performs Gaussian blurring on the image by given kernel.
The image can be a PIL Image or a Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
Args: Args:
img (PIL Image or Tensor): Image to be blurred img (PIL Image or Tensor): Image to be blurred
kernel_size (sequence of ints or int): Gaussian kernel size. Can be a sequence of integers kernel_size (sequence of ints or int): Gaussian kernel size. Can be a sequence of integers
like ``(kx, ky)`` or a single integer for square kernels. like ``(kx, ky)`` or a single integer for square kernels.
In torchscript mode kernel_size as single int is not supported, use a tuple or In torchscript mode kernel_size as single int is not supported, use a sequence of length 1: ``[ksize, ]``.
list of length 1: ``[ksize, ]``.
sigma (sequence of floats or float, optional): Gaussian kernel standard deviation. Can be a sigma (sequence of floats or float, optional): Gaussian kernel standard deviation. Can be a
sequence of floats like ``(sigma_x, sigma_y)`` or a single float to define the sequence of floats like ``(sigma_x, sigma_y)`` or a single float to define the
same sigma in both X/Y directions. If None, then it is computed using same sigma in both X/Y directions. If None, then it is computed using
``kernel_size`` as ``sigma = 0.3 * ((kernel_size - 1) * 0.5 - 1) + 0.8``. ``kernel_size`` as ``sigma = 0.3 * ((kernel_size - 1) * 0.5 - 1) + 0.8``.
Default, None. In torchscript mode sigma as single float is Default, None. In torchscript mode sigma as single float is
not supported, use a tuple or list of length 1: ``[sigma, ]``. not supported, use a sequence of length 1: ``[sigma, ]``.
Returns: Returns:
PIL Image or Tensor: Gaussian Blurred version of the image. PIL Image or Tensor: Gaussian Blurred version of the image.
...@@ -1175,13 +1181,13 @@ def gaussian_blur(img: Tensor, kernel_size: List[int], sigma: Optional[List[floa ...@@ -1175,13 +1181,13 @@ def gaussian_blur(img: Tensor, kernel_size: List[int], sigma: Optional[List[floa
def invert(img: Tensor) -> Tensor: def invert(img: Tensor) -> Tensor:
"""Invert the colors of an RGB/grayscale PIL Image or torch Tensor. """Invert the colors of an RGB/grayscale image.
Args: Args:
img (PIL Image or Tensor): Image to have its colors inverted. img (PIL Image or Tensor): Image to have its colors inverted.
If img is a Tensor, it is expected to be in [..., H, W] format, If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format,
where ... means it can have an arbitrary number of trailing where ... means it can have an arbitrary number of leading dimensions.
dimensions. If img is PIL Image, it is expected to be in mode "L" or "RGB".
Returns: Returns:
PIL Image or Tensor: Color inverted image. PIL Image or Tensor: Color inverted image.
...@@ -1193,13 +1199,14 @@ def invert(img: Tensor) -> Tensor: ...@@ -1193,13 +1199,14 @@ def invert(img: Tensor) -> Tensor:
def posterize(img: Tensor, bits: int) -> Tensor: def posterize(img: Tensor, bits: int) -> Tensor:
"""Posterize a PIL Image or torch Tensor by reducing the number of bits for each color channel. """Posterize an image by reducing the number of bits for each color channel.
Args: Args:
img (PIL Image or Tensor): Image to have its colors posterized. img (PIL Image or Tensor): Image to have its colors posterized.
If img is a Tensor, it should be of type torch.uint8 and If img is a Tensor, it should be of type torch.uint8 and
it is expected to be in [..., H, W] format, where ... means it is expected to be in [..., 1 or 3, H, W] format, where ... means
it can have an arbitrary number of trailing dimensions. it can have an arbitrary number of leading dimensions.
If img is PIL Image, it is expected to be in mode "L" or "RGB".
bits (int): The number of bits to keep for each channel (0-8). bits (int): The number of bits to keep for each channel (0-8).
Returns: Returns:
PIL Image or Tensor: Posterized image. PIL Image or Tensor: Posterized image.
...@@ -1214,13 +1221,13 @@ def posterize(img: Tensor, bits: int) -> Tensor: ...@@ -1214,13 +1221,13 @@ def posterize(img: Tensor, bits: int) -> Tensor:
def solarize(img: Tensor, threshold: float) -> Tensor: def solarize(img: Tensor, threshold: float) -> Tensor:
"""Solarize a PIL Image or torch Tensor by inverting all pixel values above a threshold. """Solarize an RGB/grayscale image by inverting all pixel values above a threshold.
Args: Args:
img (PIL Image or Tensor): Image to have its colors inverted. img (PIL Image or Tensor): Image to have its colors inverted.
If img is a Tensor, it is expected to be in [..., H, W] format, If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format,
where ... means it can have an arbitrary number of trailing where ... means it can have an arbitrary number of leading dimensions.
dimensions. If img is PIL Image, it is expected to be in mode "L" or "RGB".
threshold (float): All pixels equal or above this value are inverted. threshold (float): All pixels equal or above this value are inverted.
Returns: Returns:
PIL Image or Tensor: Solarized image. PIL Image or Tensor: Solarized image.
...@@ -1232,10 +1239,12 @@ def solarize(img: Tensor, threshold: float) -> Tensor: ...@@ -1232,10 +1239,12 @@ def solarize(img: Tensor, threshold: float) -> Tensor:
def adjust_sharpness(img: Tensor, sharpness_factor: float) -> Tensor: def adjust_sharpness(img: Tensor, sharpness_factor: float) -> Tensor:
"""Adjust the sharpness of an Image. """Adjust the sharpness of an image.
Args: Args:
img (PIL Image or Tensor): Image to be adjusted. img (PIL Image or Tensor): Image to be adjusted.
If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format,
where ... means it can have an arbitrary number of leading dimensions.
sharpness_factor (float): How much to adjust the sharpness. Can be sharpness_factor (float): How much to adjust the sharpness. Can be
any non negative number. 0 gives a blurred image, 1 gives the any non negative number. 0 gives a blurred image, 1 gives the
original image while 2 increases the sharpness by a factor of 2. original image while 2 increases the sharpness by a factor of 2.
...@@ -1250,15 +1259,15 @@ def adjust_sharpness(img: Tensor, sharpness_factor: float) -> Tensor: ...@@ -1250,15 +1259,15 @@ def adjust_sharpness(img: Tensor, sharpness_factor: float) -> Tensor:
def autocontrast(img: Tensor) -> Tensor: def autocontrast(img: Tensor) -> Tensor:
"""Maximize contrast of a PIL Image or torch Tensor by remapping its """Maximize contrast of an image by remapping its
pixels per channel so that the lowest becomes black and the lightest pixels per channel so that the lowest becomes black and the lightest
becomes white. becomes white.
Args: Args:
img (PIL Image or Tensor): Image on which autocontrast is applied. img (PIL Image or Tensor): Image on which autocontrast is applied.
If img is a Tensor, it is expected to be in [..., H, W] format, If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format,
where ... means it can have an arbitrary number of trailing where ... means it can have an arbitrary number of leading dimensions.
dimensions. If img is PIL Image, it is expected to be in mode "L" or "RGB".
Returns: Returns:
PIL Image or Tensor: An image that was autocontrasted. PIL Image or Tensor: An image that was autocontrasted.
...@@ -1270,15 +1279,15 @@ def autocontrast(img: Tensor) -> Tensor: ...@@ -1270,15 +1279,15 @@ def autocontrast(img: Tensor) -> Tensor:
def equalize(img: Tensor) -> Tensor: def equalize(img: Tensor) -> Tensor:
"""Equalize the histogram of a PIL Image or torch Tensor by applying """Equalize the histogram of an image by applying
a non-linear mapping to the input in order to create a uniform a non-linear mapping to the input in order to create a uniform
distribution of grayscale values in the output. distribution of grayscale values in the output.
Args: Args:
img (PIL Image or Tensor): Image on which equalize is applied. img (PIL Image or Tensor): Image on which equalize is applied.
If img is a Tensor, it is expected to be in [..., H, W] format, If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format,
where ... means it can have an arbitrary number of trailing where ... means it can have an arbitrary number of leading dimensions.
dimensions. If img is PIL Image, it is expected to be in mode "P", "L" or "RGB".
Returns: Returns:
PIL Image or Tensor: An image that was equalized. PIL Image or Tensor: An image that was equalized.
......
...@@ -35,19 +35,6 @@ def _get_image_num_channels(img: Any) -> int: ...@@ -35,19 +35,6 @@ def _get_image_num_channels(img: Any) -> int:
@torch.jit.unused @torch.jit.unused
def hflip(img): def hflip(img):
"""PRIVATE METHOD. Horizontally flip the given PIL Image.
.. warning::
Module ``transforms.functional_pil`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (PIL Image): Image to be flipped.
Returns:
PIL Image: Horizontally flipped image.
"""
if not _is_pil_image(img): if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img))) raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
...@@ -56,19 +43,6 @@ def hflip(img): ...@@ -56,19 +43,6 @@ def hflip(img):
@torch.jit.unused @torch.jit.unused
def vflip(img): def vflip(img):
"""PRIVATE METHOD. Vertically flip the given PIL Image.
.. warning::
Module ``transforms.functional_pil`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (PIL Image): Image to be flipped.
Returns:
PIL Image: Vertically flipped image.
"""
if not _is_pil_image(img): if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img))) raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
...@@ -77,22 +51,6 @@ def vflip(img): ...@@ -77,22 +51,6 @@ def vflip(img):
@torch.jit.unused @torch.jit.unused
def adjust_brightness(img, brightness_factor): def adjust_brightness(img, brightness_factor):
"""PRIVATE METHOD. Adjust brightness of an RGB image.
.. warning::
Module ``transforms.functional_pil`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (PIL Image): Image to be adjusted.
brightness_factor (float): How much to adjust the brightness. Can be
any non negative number. 0 gives a black image, 1 gives the
original image while 2 increases the brightness by a factor of 2.
Returns:
PIL Image: Brightness adjusted image.
"""
if not _is_pil_image(img): if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img))) raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
...@@ -103,21 +61,6 @@ def adjust_brightness(img, brightness_factor): ...@@ -103,21 +61,6 @@ def adjust_brightness(img, brightness_factor):
@torch.jit.unused @torch.jit.unused
def adjust_contrast(img, contrast_factor): def adjust_contrast(img, contrast_factor):
"""PRIVATE METHOD. Adjust contrast of an Image.
.. warning::
Module ``transforms.functional_pil`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (PIL Image): PIL Image to be adjusted.
contrast_factor (float): How much to adjust the contrast. Can be any
non negative number. 0 gives a solid gray image, 1 gives the
original image while 2 increases the contrast by a factor of 2.
Returns:
PIL Image: Contrast adjusted image.
"""
if not _is_pil_image(img): if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img))) raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
...@@ -128,21 +71,6 @@ def adjust_contrast(img, contrast_factor): ...@@ -128,21 +71,6 @@ def adjust_contrast(img, contrast_factor):
@torch.jit.unused @torch.jit.unused
def adjust_saturation(img, saturation_factor): def adjust_saturation(img, saturation_factor):
"""PRIVATE METHOD. Adjust color saturation of an image.
.. warning::
Module ``transforms.functional_pil`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (PIL Image): PIL Image to be adjusted.
saturation_factor (float): How much to adjust the saturation. 0 will
give a black and white image, 1 will give the original image while
2 will enhance the saturation by a factor of 2.
Returns:
PIL Image: Saturation adjusted image.
"""
if not _is_pil_image(img): if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img))) raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
...@@ -153,35 +81,6 @@ def adjust_saturation(img, saturation_factor): ...@@ -153,35 +81,6 @@ def adjust_saturation(img, saturation_factor):
@torch.jit.unused @torch.jit.unused
def adjust_hue(img, hue_factor): def adjust_hue(img, hue_factor):
"""PRIVATE METHOD. Adjust hue of an image.
.. warning::
Module ``transforms.functional_pil`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
The image hue is adjusted by converting the image to HSV and
cyclically shifting the intensities in the hue channel (H).
The image is then converted back to original image mode.
`hue_factor` is the amount of shift in H channel and must be in the
interval `[-0.5, 0.5]`.
See `Hue`_ for more details.
.. _Hue: https://en.wikipedia.org/wiki/Hue
Args:
img (PIL Image): PIL Image to be adjusted.
hue_factor (float): How much to shift the hue channel. Should be in
[-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in
HSV space in positive and negative direction respectively.
0 means no shift. Therefore, both -0.5 and 0.5 will give an image
with complementary colors while 0 gives the original image.
Returns:
PIL Image: Hue adjusted image.
"""
if not(-0.5 <= hue_factor <= 0.5): if not(-0.5 <= hue_factor <= 0.5):
raise ValueError('hue_factor ({}) is not in [-0.5, 0.5].'.format(hue_factor)) raise ValueError('hue_factor ({}) is not in [-0.5, 0.5].'.format(hue_factor))
...@@ -206,30 +105,6 @@ def adjust_hue(img, hue_factor): ...@@ -206,30 +105,6 @@ def adjust_hue(img, hue_factor):
@torch.jit.unused @torch.jit.unused
def adjust_gamma(img, gamma, gain=1): def adjust_gamma(img, gamma, gain=1):
r"""PRIVATE METHOD. Perform gamma correction on an image.
.. warning::
Module ``transforms.functional_pil`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Also known as Power Law Transform. Intensities in RGB mode are adjusted
based on the following equation:
.. math::
I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma}
See `Gamma Correction`_ for more details.
.. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction
Args:
img (PIL Image): PIL Image to be adjusted.
gamma (float): Non negative real number, same as :math:`\gamma` in the equation.
gamma larger than 1 make the shadows darker,
while gamma smaller than 1 make dark regions lighter.
gain (float): The constant multiplier.
"""
if not _is_pil_image(img): if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img))) raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
...@@ -247,44 +122,6 @@ def adjust_gamma(img, gamma, gain=1): ...@@ -247,44 +122,6 @@ def adjust_gamma(img, gamma, gain=1):
@torch.jit.unused @torch.jit.unused
def pad(img, padding, fill=0, padding_mode="constant"): def pad(img, padding, fill=0, padding_mode="constant"):
r"""PRIVATE METHOD. Pad the given PIL.Image on all sides with the given "pad" value.
.. warning::
Module ``transforms.functional_pil`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (PIL Image): Image to be padded.
padding (int or tuple or list): Padding on each border. If a single int is provided this
is used to pad all borders. If a tuple or list of length 2 is provided this is the padding
on left/right and top/bottom respectively. If a tuple or list of length 4 is provided
this is the padding for the left, top, right and bottom borders respectively. For compatibility reasons
with ``functional_tensor.pad``, if a tuple or list of length 1 is provided, it is interpreted as
a single int.
fill (int or str or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
length 3, it is used to fill R, G, B channels respectively.
This value is only used when the padding_mode is constant.
padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant.
- constant: pads with a constant value, this value is specified with fill
- edge: pads with the last value on the edge of the image
- reflect: pads with reflection of image (without repeating the last value on the edge)
padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
will result in [3, 2, 1, 2, 3, 4, 3, 2]
- symmetric: pads with reflection of image (repeating the last value on the edge)
padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
will result in [2, 1, 1, 2, 3, 4, 4, 3]
Returns:
PIL Image: Padded image.
"""
if not _is_pil_image(img): if not _is_pil_image(img):
raise TypeError("img should be PIL Image. Got {}".format(type(img))) raise TypeError("img should be PIL Image. Got {}".format(type(img)))
...@@ -360,23 +197,6 @@ def pad(img, padding, fill=0, padding_mode="constant"): ...@@ -360,23 +197,6 @@ def pad(img, padding, fill=0, padding_mode="constant"):
@torch.jit.unused @torch.jit.unused
def crop(img: Image.Image, top: int, left: int, height: int, width: int) -> Image.Image: def crop(img: Image.Image, top: int, left: int, height: int, width: int) -> Image.Image:
"""PRIVATE METHOD. Crop the given PIL Image.
.. warning::
Module ``transforms.functional_pil`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image.
top (int): Vertical component of the top left corner of the crop box.
left (int): Horizontal component of the top left corner of the crop box.
height (int): Height of the crop box.
width (int): Width of the crop box.
Returns:
PIL Image: Cropped image.
"""
if not _is_pil_image(img): if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img))) raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
...@@ -385,27 +205,6 @@ def crop(img: Image.Image, top: int, left: int, height: int, width: int) -> Imag ...@@ -385,27 +205,6 @@ def crop(img: Image.Image, top: int, left: int, height: int, width: int) -> Imag
@torch.jit.unused @torch.jit.unused
def resize(img, size, interpolation=Image.BILINEAR): def resize(img, size, interpolation=Image.BILINEAR):
r"""PRIVATE METHOD. Resize the input PIL Image to the given size.
.. warning::
Module ``transforms.functional_pil`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (PIL Image): Image to be resized.
size (sequence or int): Desired output size. If size is a sequence like
(h, w), the output size will be matched to this. If size is an int,
the smaller edge of the image will be matched to this number maintaining
the aspect ratio. i.e, if height > width, then image will be rescaled to
:math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)`.
For compatibility reasons with ``functional_tensor.resize``, if a tuple or list of length 1 is provided,
it is interpreted as a single int.
interpolation (int, optional): Desired interpolation. Default is ``PIL.Image.BILINEAR``.
Returns:
PIL Image: Resized image.
"""
if not _is_pil_image(img): if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img))) raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
if not (isinstance(size, int) or (isinstance(size, Sequence) and len(size) in (1, 2))): if not (isinstance(size, int) or (isinstance(size, Sequence) and len(size) in (1, 2))):
...@@ -431,25 +230,7 @@ def resize(img, size, interpolation=Image.BILINEAR): ...@@ -431,25 +230,7 @@ def resize(img, size, interpolation=Image.BILINEAR):
@torch.jit.unused @torch.jit.unused
def _parse_fill(fill, img, min_pil_version, name="fillcolor"): def _parse_fill(fill, img, min_pil_version, name="fillcolor"):
"""PRIVATE METHOD. Helper function to get the fill color for rotate, perspective transforms, and pad. # Process fill color for affine transforms
.. warning::
Module ``transforms.functional_pil`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
fill (n-tuple or int or float): Pixel fill value for area outside the transformed
image. If int or float, the value is used for all bands respectively.
Defaults to 0 for all bands.
img (PIL Image): Image to be filled.
min_pil_version (str): The minimum PILLOW version for when the ``fillcolor`` option
was first introduced in the calling function. (e.g. rotate->5.2.0, perspective->5.0.0)
name (str): Name of the ``fillcolor`` option in the output. Defaults to ``"fillcolor"``.
Returns:
dict: kwarg for ``fillcolor``
"""
major_found, minor_found = (int(v) for v in PILLOW_VERSION.split('.')[:2]) major_found, minor_found = (int(v) for v in PILLOW_VERSION.split('.')[:2])
major_required, minor_required = (int(v) for v in min_pil_version.split('.')[:2]) major_required, minor_required = (int(v) for v in min_pil_version.split('.')[:2])
if major_found < major_required or (major_found == major_required and minor_found < minor_required): if major_found < major_required or (major_found == major_required and minor_found < minor_required):
...@@ -478,25 +259,6 @@ def _parse_fill(fill, img, min_pil_version, name="fillcolor"): ...@@ -478,25 +259,6 @@ def _parse_fill(fill, img, min_pil_version, name="fillcolor"):
@torch.jit.unused @torch.jit.unused
def affine(img, matrix, interpolation=0, fill=None): def affine(img, matrix, interpolation=0, fill=None):
"""PRIVATE METHOD. Apply affine transformation on the PIL Image keeping image center invariant.
.. warning::
Module ``transforms.functional_pil`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (PIL Image): image to be rotated.
matrix (list of floats): list of 6 float values representing inverse matrix for affine transformation.
interpolation (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional):
An optional resampling filter.
See `filters`_ for more information.
If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``.
fill (int): Optional fill color for the area outside the transform in the output image. (Pillow>=5.0.0)
Returns:
PIL Image: Transformed image.
"""
if not _is_pil_image(img): if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img))) raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
...@@ -507,36 +269,6 @@ def affine(img, matrix, interpolation=0, fill=None): ...@@ -507,36 +269,6 @@ def affine(img, matrix, interpolation=0, fill=None):
@torch.jit.unused @torch.jit.unused
def rotate(img, angle, interpolation=0, expand=False, center=None, fill=None): def rotate(img, angle, interpolation=0, expand=False, center=None, fill=None):
"""PRIVATE METHOD. Rotate PIL image by angle.
.. warning::
Module ``transforms.functional_pil`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (PIL Image): image to be rotated.
angle (float or int): rotation angle value in degrees, counter-clockwise.
interpolation (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional):
An optional resampling filter. See `filters`_ for more information.
If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``.
expand (bool, optional): Optional expansion flag.
If true, expands the output image to make it large enough to hold the entire rotated image.
If false or omitted, make the output image the same size as the input image.
Note that the expand flag assumes rotation around the center and no translation.
center (2-tuple, optional): Optional center of rotation.
Origin is the upper left corner.
Default is the center of the image.
fill (n-tuple or int or float): Pixel fill value for area outside the rotated
image. If int or float, the value is used for all bands respectively.
Defaults to 0 for all bands. This option is only available for ``pillow>=5.2.0``.
Returns:
PIL Image: Rotated image.
.. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
"""
if not _is_pil_image(img): if not _is_pil_image(img):
raise TypeError("img should be PIL Image. Got {}".format(type(img))) raise TypeError("img should be PIL Image. Got {}".format(type(img)))
...@@ -546,25 +278,6 @@ def rotate(img, angle, interpolation=0, expand=False, center=None, fill=None): ...@@ -546,25 +278,6 @@ def rotate(img, angle, interpolation=0, expand=False, center=None, fill=None):
@torch.jit.unused @torch.jit.unused
def perspective(img, perspective_coeffs, interpolation=Image.BICUBIC, fill=None): def perspective(img, perspective_coeffs, interpolation=Image.BICUBIC, fill=None):
"""PRIVATE METHOD. Perform perspective transform of the given PIL Image.
.. warning::
Module ``transforms.functional_pil`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (PIL Image): Image to be transformed.
perspective_coeffs (list of float): perspective transformation coefficients.
interpolation (int): Interpolation type. Default, ``Image.BICUBIC``.
fill (n-tuple or int or float): Pixel fill value for area outside the rotated
image. If int or float, the value is used for all bands respectively.
This option is only available for ``pillow>=5.0.0``.
Returns:
PIL Image: Perspectively transformed Image.
"""
if not _is_pil_image(img): if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img))) raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
...@@ -575,23 +288,6 @@ def perspective(img, perspective_coeffs, interpolation=Image.BICUBIC, fill=None) ...@@ -575,23 +288,6 @@ def perspective(img, perspective_coeffs, interpolation=Image.BICUBIC, fill=None)
@torch.jit.unused @torch.jit.unused
def to_grayscale(img, num_output_channels): def to_grayscale(img, num_output_channels):
"""PRIVATE METHOD. Convert PIL image of any mode (RGB, HSV, LAB, etc) to grayscale version of image.
.. warning::
Module ``transforms.functional_pil`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (PIL Image): Image to be converted to grayscale.
num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default, 1.
Returns:
PIL Image: Grayscale version of the image.
if num_output_channels = 1 : returned image is single channel
if num_output_channels = 3 : returned image is 3 channel with r = g = b
"""
if not _is_pil_image(img): if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img))) raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
......
...@@ -17,7 +17,7 @@ def _assert_image_tensor(img): ...@@ -17,7 +17,7 @@ def _assert_image_tensor(img):
def _get_image_size(img: Tensor) -> List[int]: def _get_image_size(img: Tensor) -> List[int]:
"""Returns (w, h) of tensor image""" # Returns (w, h) of tensor image
_assert_image_tensor(img) _assert_image_tensor(img)
return [img.shape[-1], img.shape[-2]] return [img.shape[-1], img.shape[-2]]
...@@ -56,31 +56,6 @@ def _assert_channels(img: Tensor, permitted: List[int]) -> None: ...@@ -56,31 +56,6 @@ def _assert_channels(img: Tensor, permitted: List[int]) -> None:
def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float) -> torch.Tensor: def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float) -> torch.Tensor:
"""PRIVATE METHOD. Convert a tensor image to the given ``dtype`` and scale the values accordingly
.. warning::
Module ``transforms.functional_tensor`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
image (torch.Tensor): Image to be converted
dtype (torch.dtype): Desired data type of the output
Returns:
(torch.Tensor): Converted image
.. note::
When converting from a smaller to a larger integer ``dtype`` the maximum values are **not** mapped exactly.
If converted back and forth, this mismatch has no effect.
Raises:
RuntimeError: When trying to cast :class:`torch.float32` to :class:`torch.int32` or :class:`torch.int64` as
well as for trying to cast :class:`torch.float64` to :class:`torch.int64`. These conversions might lead to
overflow errors since the floating point ``dtype`` cannot store consecutive integers over the whole range
of the integer ``dtype``.
"""
if image.dtype == dtype: if image.dtype == dtype:
return image return image
...@@ -134,88 +109,24 @@ def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float) - ...@@ -134,88 +109,24 @@ def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float) -
def vflip(img: Tensor) -> Tensor: def vflip(img: Tensor) -> Tensor:
"""PRIVATE METHOD. Vertically flip the given the Image Tensor.
.. warning::
Module ``transforms.functional_tensor`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (Tensor): Image Tensor to be flipped in the form [..., C, H, W].
Returns:
Tensor: Vertically flipped image Tensor.
"""
_assert_image_tensor(img) _assert_image_tensor(img)
return img.flip(-2) return img.flip(-2)
def hflip(img: Tensor) -> Tensor: def hflip(img: Tensor) -> Tensor:
"""PRIVATE METHOD. Horizontally flip the given the Image Tensor.
.. warning::
Module ``transforms.functional_tensor`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (Tensor): Image Tensor to be flipped in the form [..., C, H, W].
Returns:
Tensor: Horizontally flipped image Tensor.
"""
_assert_image_tensor(img) _assert_image_tensor(img)
return img.flip(-1) return img.flip(-1)
def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor: def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor:
"""PRIVATE METHOD. Crop the given Image Tensor.
.. warning::
Module ``transforms.functional_tensor`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (Tensor): Image to be cropped in the form [..., H, W]. (0,0) denotes the top left corner of the image.
top (int): Vertical component of the top left corner of the crop box.
left (int): Horizontal component of the top left corner of the crop box.
height (int): Height of the crop box.
width (int): Width of the crop box.
Returns:
Tensor: Cropped image.
"""
_assert_image_tensor(img) _assert_image_tensor(img)
return img[..., top:top + height, left:left + width] return img[..., top:top + height, left:left + width]
def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor: def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor:
"""PRIVATE METHOD. Convert the given RGB Image Tensor to Grayscale.
.. warning::
Module ``transforms.functional_tensor`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
For RGB to Grayscale conversion, ITU-R 601-2 luma transform is performed which
is L = R * 0.2989 + G * 0.5870 + B * 0.1140
Args:
img (Tensor): Image to be converted to Grayscale in the form [C, H, W].
num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default, 1.
Returns:
Tensor: Grayscale version of the image.
if num_output_channels = 1 : returned image is single channel
if num_output_channels = 3 : returned image is 3 channel with r = g = b
"""
if img.ndim < 3: if img.ndim < 3:
raise TypeError("Input image tensor should have at least 3 dimensions, but found {}".format(img.ndim)) raise TypeError("Input image tensor should have at least 3 dimensions, but found {}".format(img.ndim))
_assert_channels(img, [3]) _assert_channels(img, [3])
...@@ -236,22 +147,6 @@ def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor: ...@@ -236,22 +147,6 @@ def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor:
def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor: def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor:
"""PRIVATE METHOD. Adjust brightness of a Grayscale or RGB image.
.. warning::
Module ``transforms.functional_tensor`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (Tensor): Image to be adjusted.
brightness_factor (float): How much to adjust the brightness. Can be
any non negative number. 0 gives a black image, 1 gives the
original image while 2 increases the brightness by a factor of 2.
Returns:
Tensor: Brightness adjusted image.
"""
if brightness_factor < 0: if brightness_factor < 0:
raise ValueError('brightness_factor ({}) is not non-negative.'.format(brightness_factor)) raise ValueError('brightness_factor ({}) is not non-negative.'.format(brightness_factor))
...@@ -263,22 +158,6 @@ def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor: ...@@ -263,22 +158,6 @@ def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor:
def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor: def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor:
"""PRIVATE METHOD. Adjust contrast of an RGB image.
.. warning::
Module ``transforms.functional_tensor`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (Tensor): Image to be adjusted.
contrast_factor (float): How much to adjust the contrast. Can be any
non negative number. 0 gives a solid gray image, 1 gives the
original image while 2 increases the contrast by a factor of 2.
Returns:
Tensor: Contrast adjusted image.
"""
if contrast_factor < 0: if contrast_factor < 0:
raise ValueError('contrast_factor ({}) is not non-negative.'.format(contrast_factor)) raise ValueError('contrast_factor ({}) is not non-negative.'.format(contrast_factor))
...@@ -293,35 +172,6 @@ def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor: ...@@ -293,35 +172,6 @@ def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor:
def adjust_hue(img: Tensor, hue_factor: float) -> Tensor: def adjust_hue(img: Tensor, hue_factor: float) -> Tensor:
"""PRIVATE METHOD. Adjust hue of an RGB image.
.. warning::
Module ``transforms.functional_tensor`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
The image hue is adjusted by converting the image to HSV and
cyclically shifting the intensities in the hue channel (H).
The image is then converted back to original image mode.
`hue_factor` is the amount of shift in H channel and must be in the
interval `[-0.5, 0.5]`.
See `Hue`_ for more details.
.. _Hue: https://en.wikipedia.org/wiki/Hue
Args:
img (Tensor): Image to be adjusted. Image type is either uint8 or float.
hue_factor (float): How much to shift the hue channel. Should be in
[-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in
HSV space in positive and negative direction respectively.
0 means no shift. Therefore, both -0.5 and 0.5 will give an image
with complementary colors while 0 gives the original image.
Returns:
Tensor: Hue adjusted image.
"""
if not (-0.5 <= hue_factor <= 0.5): if not (-0.5 <= hue_factor <= 0.5):
raise ValueError('hue_factor ({}) is not in [-0.5, 0.5].'.format(hue_factor)) raise ValueError('hue_factor ({}) is not in [-0.5, 0.5].'.format(hue_factor))
...@@ -349,22 +199,6 @@ def adjust_hue(img: Tensor, hue_factor: float) -> Tensor: ...@@ -349,22 +199,6 @@ def adjust_hue(img: Tensor, hue_factor: float) -> Tensor:
def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor: def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor:
"""PRIVATE METHOD. Adjust color saturation of an RGB image.
.. warning::
Module ``transforms.functional_tensor`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (Tensor): Image to be adjusted.
saturation_factor (float): How much to adjust the saturation. Can be any
non negative number. 0 gives a black and white image, 1 gives the
original image while 2 enhances the saturation by a factor of 2.
Returns:
Tensor: Saturation adjusted image.
"""
if saturation_factor < 0: if saturation_factor < 0:
raise ValueError('saturation_factor ({}) is not non-negative.'.format(saturation_factor)) raise ValueError('saturation_factor ({}) is not non-negative.'.format(saturation_factor))
...@@ -376,31 +210,6 @@ def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor: ...@@ -376,31 +210,6 @@ def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor:
def adjust_gamma(img: Tensor, gamma: float, gain: float = 1) -> Tensor: def adjust_gamma(img: Tensor, gamma: float, gain: float = 1) -> Tensor:
r"""PRIVATE METHOD. Adjust gamma of a Grayscale or RGB image.
.. warning::
Module ``transforms.functional_tensor`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Also known as Power Law Transform. Intensities in RGB mode are adjusted
based on the following equation:
.. math::
`I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma}`
See `Gamma Correction`_ for more details.
.. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction
Args:
img (Tensor): Tensor of RBG values to be adjusted.
gamma (float): Non negative real number, same as :math:`\gamma` in the equation.
gamma larger than 1 make the shadows darker,
while gamma smaller than 1 make dark regions lighter.
gain (float): The constant multiplier.
"""
if not isinstance(img, torch.Tensor): if not isinstance(img, torch.Tensor):
raise TypeError('Input img should be a Tensor.') raise TypeError('Input img should be a Tensor.')
...@@ -422,25 +231,7 @@ def adjust_gamma(img: Tensor, gamma: float, gain: float = 1) -> Tensor: ...@@ -422,25 +231,7 @@ def adjust_gamma(img: Tensor, gamma: float, gain: float = 1) -> Tensor:
def center_crop(img: Tensor, output_size: BroadcastingList2[int]) -> Tensor: def center_crop(img: Tensor, output_size: BroadcastingList2[int]) -> Tensor:
"""DEPRECATED. Crop the Image Tensor and resize it to desired size. """DEPRECATED
.. warning::
Module ``transforms.functional_tensor`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
.. warning::
This method is deprecated and will be removed in future releases.
Please, use ``F.center_crop`` instead.
Args:
img (Tensor): Image to be cropped.
output_size (sequence or int): (height, width) of the crop box. If int,
it is used for both directions
Returns:
Tensor: Cropped image.
""" """
warnings.warn( warnings.warn(
"This method is deprecated and will be removed in future releases. " "This method is deprecated and will be removed in future releases. "
...@@ -464,32 +255,7 @@ def center_crop(img: Tensor, output_size: BroadcastingList2[int]) -> Tensor: ...@@ -464,32 +255,7 @@ def center_crop(img: Tensor, output_size: BroadcastingList2[int]) -> Tensor:
def five_crop(img: Tensor, size: BroadcastingList2[int]) -> List[Tensor]: def five_crop(img: Tensor, size: BroadcastingList2[int]) -> List[Tensor]:
"""DEPRECATED. Crop the given Image Tensor into four corners and the central crop. """DEPRECATED
.. warning::
Module ``transforms.functional_tensor`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
.. warning::
This method is deprecated and will be removed in future releases.
Please, use ``F.five_crop`` instead.
.. Note::
This transform returns a List of Tensors and there may be a
mismatch in the number of inputs and targets your ``Dataset`` returns.
Args:
img (Tensor): Image to be cropped.
size (sequence or int): Desired output size of the crop. If size is an
int instead of sequence like (h, w), a square crop (size, size) is
made.
Returns:
List: List (tl, tr, bl, br, center)
Corresponding top left, top right, bottom left, bottom right and center crop.
""" """
warnings.warn( warnings.warn(
"This method is deprecated and will be removed in future releases. " "This method is deprecated and will be removed in future releases. "
...@@ -516,35 +282,7 @@ def five_crop(img: Tensor, size: BroadcastingList2[int]) -> List[Tensor]: ...@@ -516,35 +282,7 @@ def five_crop(img: Tensor, size: BroadcastingList2[int]) -> List[Tensor]:
def ten_crop(img: Tensor, size: BroadcastingList2[int], vertical_flip: bool = False) -> List[Tensor]: def ten_crop(img: Tensor, size: BroadcastingList2[int], vertical_flip: bool = False) -> List[Tensor]:
"""DEPRECATED. Crop the given Image Tensor into four corners and the central crop plus the """DEPRECATED
flipped version of these (horizontal flipping is used by default).
.. warning::
Module ``transforms.functional_tensor`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
.. warning::
This method is deprecated and will be removed in future releases.
Please, use ``F.ten_crop`` instead.
.. Note::
This transform returns a List of images and there may be a
mismatch in the number of inputs and targets your ``Dataset`` returns.
Args:
img (Tensor): Image to be cropped.
size (sequence or int): Desired output size of the crop. If size is an
int instead of sequence like (h, w), a square crop (size, size) is
made.
vertical_flip (bool): Use vertical flipping instead of horizontal
Returns:
List: List (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip)
Corresponding top left, top right, bottom left, bottom right and center crop
and same for the flipped image's tensor.
""" """
warnings.warn( warnings.warn(
"This method is deprecated and will be removed in future releases. " "This method is deprecated and will be removed in future releases. "
...@@ -663,43 +401,6 @@ def _pad_symmetric(img: Tensor, padding: List[int]) -> Tensor: ...@@ -663,43 +401,6 @@ def _pad_symmetric(img: Tensor, padding: List[int]) -> Tensor:
def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "constant") -> Tensor: def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "constant") -> Tensor:
r"""PRIVATE METHOD. Pad the given Tensor Image on all sides with specified padding mode and fill value.
.. warning::
Module ``transforms.functional_tensor`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (Tensor): Image to be padded.
padding (int or tuple or list): Padding on each border. If a single int is provided this
is used to pad all borders. If a tuple or list of length 2 is provided this is the padding
on left/right and top/bottom respectively. If a tuple or list of length 4 is provided
this is the padding for the left, top, right and bottom borders
respectively. In torchscript mode padding as single int is not supported, use a tuple or
list of length 1: ``[padding, ]``.
fill (int): Pixel fill value for constant fill. Default is 0.
This value is only used when the padding_mode is constant
padding_mode (str): Type of padding. Should be: constant, edge or reflect. Default is constant.
Mode symmetric is not yet supported for Tensor inputs.
- constant: pads with a constant value, this value is specified with fill
- edge: pads with the last value on the edge of the image
- reflect: pads with reflection of image (without repeating the last value on the edge)
padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
will result in [3, 2, 1, 2, 3, 4, 3, 2]
- symmetric: pads with reflection of image (repeating the last value on the edge)
padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
will result in [2, 1, 1, 2, 3, 4, 4, 3]
Returns:
Tensor: Padded image.
"""
_assert_image_tensor(img) _assert_image_tensor(img)
if not isinstance(padding, (int, tuple, list)): if not isinstance(padding, (int, tuple, list)):
...@@ -770,28 +471,6 @@ def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "con ...@@ -770,28 +471,6 @@ def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "con
def resize(img: Tensor, size: List[int], interpolation: str = "bilinear") -> Tensor: def resize(img: Tensor, size: List[int], interpolation: str = "bilinear") -> Tensor:
r"""PRIVATE METHOD. Resize the input Tensor to the given size.
.. warning::
Module ``transforms.functional_tensor`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (Tensor): Image to be resized.
size (int or tuple or list): Desired output size. If size is a sequence like
(h, w), the output size will be matched to this. If size is an int,
the smaller edge of the image will be matched to this number maintaining
the aspect ratio. i.e, if height > width, then image will be rescaled to
:math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)`.
In torchscript mode padding as a single int is not supported, use a tuple or
list of length 1: ``[size, ]``.
interpolation (str): Desired interpolation. Default is "bilinear". Other supported values:
"nearest" and "bicubic".
Returns:
Tensor: Resized image.
"""
_assert_image_tensor(img) _assert_image_tensor(img)
if not isinstance(size, (int, tuple, list)): if not isinstance(size, (int, tuple, list)):
...@@ -965,23 +644,6 @@ def _gen_affine_grid( ...@@ -965,23 +644,6 @@ def _gen_affine_grid(
def affine( def affine(
img: Tensor, matrix: List[float], interpolation: str = "nearest", fill: Optional[List[float]] = None img: Tensor, matrix: List[float], interpolation: str = "nearest", fill: Optional[List[float]] = None
) -> Tensor: ) -> Tensor:
"""PRIVATE METHOD. Apply affine transformation on the Tensor image keeping image center invariant.
.. warning::
Module ``transforms.functional_tensor`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (Tensor): image to be rotated.
matrix (list of floats): list of 6 float values representing inverse matrix for affine transformation.
interpolation (str): An optional resampling filter. Default is "nearest". Other supported values: "bilinear".
fill (sequence or int or float, optional): Optional fill value, default None.
If None, fill with 0.
Returns:
Tensor: Transformed image.
"""
_assert_grid_transform_inputs(img, matrix, interpolation, fill, ["nearest", "bilinear"]) _assert_grid_transform_inputs(img, matrix, interpolation, fill, ["nearest", "bilinear"])
dtype = img.dtype if torch.is_floating_point(img) else torch.float32 dtype = img.dtype if torch.is_floating_point(img) else torch.float32
...@@ -1021,31 +683,6 @@ def rotate( ...@@ -1021,31 +683,6 @@ def rotate(
img: Tensor, matrix: List[float], interpolation: str = "nearest", img: Tensor, matrix: List[float], interpolation: str = "nearest",
expand: bool = False, fill: Optional[List[float]] = None expand: bool = False, fill: Optional[List[float]] = None
) -> Tensor: ) -> Tensor:
"""PRIVATE METHOD. Rotate the Tensor image by angle.
.. warning::
Module ``transforms.functional_tensor`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (Tensor): image to be rotated.
matrix (list of floats): list of 6 float values representing inverse matrix for rotation transformation.
Translation part (``matrix[2]`` and ``matrix[5]``) should be in pixel coordinates.
interpolation (str): An optional resampling filter. Default is "nearest". Other supported values: "bilinear".
expand (bool, optional): Optional expansion flag.
If true, expands the output image to make it large enough to hold the entire rotated image.
If false or omitted, make the output image the same size as the input image.
Note that the expand flag assumes rotation around the center and no translation.
fill (sequence or int or float, optional): Optional fill value, default None.
If None, fill with 0.
Returns:
Tensor: Rotated image.
.. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
"""
_assert_grid_transform_inputs(img, matrix, interpolation, fill, ["nearest", "bilinear"]) _assert_grid_transform_inputs(img, matrix, interpolation, fill, ["nearest", "bilinear"])
w, h = img.shape[-1], img.shape[-2] w, h = img.shape[-1], img.shape[-2]
ow, oh = _compute_output_size(matrix, w, h) if expand else (w, h) ow, oh = _compute_output_size(matrix, w, h) if expand else (w, h)
...@@ -1093,24 +730,6 @@ def _perspective_grid(coeffs: List[float], ow: int, oh: int, dtype: torch.dtype, ...@@ -1093,24 +730,6 @@ def _perspective_grid(coeffs: List[float], ow: int, oh: int, dtype: torch.dtype,
def perspective( def perspective(
img: Tensor, perspective_coeffs: List[float], interpolation: str = "bilinear", fill: Optional[List[float]] = None img: Tensor, perspective_coeffs: List[float], interpolation: str = "bilinear", fill: Optional[List[float]] = None
) -> Tensor: ) -> Tensor:
"""PRIVATE METHOD. Perform perspective transform of the given Tensor image.
.. warning::
Module ``transforms.functional_tensor`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (Tensor): Image to be transformed.
perspective_coeffs (list of float): perspective transformation coefficients.
interpolation (str): Interpolation type. Default, "bilinear".
fill (sequence or int or float, optional): Optional fill value, default None.
If None, fill with 0.
Returns:
Tensor: transformed image.
"""
if not (isinstance(img, torch.Tensor)): if not (isinstance(img, torch.Tensor)):
raise TypeError('Input img should be Tensor.') raise TypeError('Input img should be Tensor.')
...@@ -1151,22 +770,6 @@ def _get_gaussian_kernel2d( ...@@ -1151,22 +770,6 @@ def _get_gaussian_kernel2d(
def gaussian_blur(img: Tensor, kernel_size: List[int], sigma: List[float]) -> Tensor: def gaussian_blur(img: Tensor, kernel_size: List[int], sigma: List[float]) -> Tensor:
"""PRIVATE METHOD. Performs Gaussian blurring on the img by given kernel.
.. warning::
Module ``transforms.functional_tensor`` is private and should not be used in user application.
Please, consider instead using methods from `transforms.functional` module.
Args:
img (Tensor): Image to be blurred
kernel_size (sequence of int or int): Kernel size of the Gaussian kernel ``(kx, ky)``.
sigma (sequence of float or float, optional): Standard deviation of the Gaussian kernel ``(sx, sy)``.
Returns:
Tensor: An image that is blurred using gaussian kernel of given parameters
"""
if not (isinstance(img, torch.Tensor)): if not (isinstance(img, torch.Tensor)):
raise TypeError('img should be Tensor. Got {}'.format(type(img))) raise TypeError('img should be Tensor. Got {}'.format(type(img)))
......
...@@ -122,6 +122,7 @@ class PILToTensor: ...@@ -122,6 +122,7 @@ class PILToTensor:
class ConvertImageDtype(torch.nn.Module): class ConvertImageDtype(torch.nn.Module):
"""Convert a tensor image to the given ``dtype`` and scale the values accordingly """Convert a tensor image to the given ``dtype`` and scale the values accordingly
This function does not support PIL Image.
Args: Args:
dtype (torch.dtype): Desired data type of the output dtype (torch.dtype): Desired data type of the output
...@@ -187,6 +188,7 @@ class ToPILImage: ...@@ -187,6 +188,7 @@ class ToPILImage:
class Normalize(torch.nn.Module): class Normalize(torch.nn.Module):
"""Normalize a tensor image with mean and standard deviation. """Normalize a tensor image with mean and standard deviation.
This transform does not support PIL Image.
Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n`` Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
channels, this transform will normalize each channel of the input channels, this transform will normalize each channel of the input
``torch.*Tensor`` i.e., ``torch.*Tensor`` i.e.,
...@@ -224,7 +226,7 @@ class Normalize(torch.nn.Module): ...@@ -224,7 +226,7 @@ class Normalize(torch.nn.Module):
class Resize(torch.nn.Module): class Resize(torch.nn.Module):
"""Resize the input image to the given size. """Resize the input image to the given size.
The image can be a PIL Image or a torch Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
Args: Args:
...@@ -233,8 +235,7 @@ class Resize(torch.nn.Module): ...@@ -233,8 +235,7 @@ class Resize(torch.nn.Module):
smaller edge of the image will be matched to this number. smaller edge of the image will be matched to this number.
i.e, if height > width, then image will be rescaled to i.e, if height > width, then image will be rescaled to
(size * height / width, size). (size * height / width, size).
In torchscript mode padding as single int is not supported, use a tuple or In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``.
list of length 1: ``[size, ]``.
interpolation (InterpolationMode): Desired interpolation enum defined by interpolation (InterpolationMode): Desired interpolation enum defined by
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``. :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` and If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` and
...@@ -288,13 +289,13 @@ class Scale(Resize): ...@@ -288,13 +289,13 @@ class Scale(Resize):
class CenterCrop(torch.nn.Module): class CenterCrop(torch.nn.Module):
"""Crops the given image at the center. """Crops the given image at the center.
The image can be a PIL Image or a torch Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
Args: Args:
size (sequence or int): Desired output size of the crop. If size is an size (sequence or int): Desired output size of the crop. If size is an
int instead of sequence like (h, w), a square crop (size, size) is int instead of sequence like (h, w), a square crop (size, size) is
made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]). made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
""" """
def __init__(self, size): def __init__(self, size):
...@@ -317,21 +318,22 @@ class CenterCrop(torch.nn.Module): ...@@ -317,21 +318,22 @@ class CenterCrop(torch.nn.Module):
class Pad(torch.nn.Module): class Pad(torch.nn.Module):
"""Pad the given image on all sides with the given "pad" value. """Pad the given image on all sides with the given "pad" value.
The image can be a PIL Image or a torch Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
Args: Args:
padding (int or tuple or list): Padding on each border. If a single int is provided this padding (int or sequence): Padding on each border. If a single int is provided this
is used to pad all borders. If tuple of length 2 is provided this is the padding is used to pad all borders. If sequence of length 2 is provided this is the padding
on left/right and top/bottom respectively. If a tuple of length 4 is provided on left/right and top/bottom respectively. If a sequence of length 4 is provided
this is the padding for the left, top, right and bottom borders respectively. this is the padding for the left, top, right and bottom borders respectively.
In torchscript mode padding as single int is not supported, use a tuple or In torchscript mode padding as single int is not supported, use a sequence of length 1: ``[padding, ]``.
list of length 1: ``[padding, ]``. fill (number or str or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
fill (int or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
length 3, it is used to fill R, G, B channels respectively. length 3, it is used to fill R, G, B channels respectively.
This value is only used when the padding_mode is constant This value is only used when the padding_mode is constant.
Only number is supported for torch Tensor.
Only int or str or tuple value is supported for PIL Image.
padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric. padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric.
Default is constant. Mode symmetric is not yet supported for Tensor inputs. Default is constant.
- constant: pads with a constant value, this value is specified with fill - constant: pads with a constant value, this value is specified with fill
...@@ -405,7 +407,7 @@ class RandomTransforms: ...@@ -405,7 +407,7 @@ class RandomTransforms:
"""Base class for a list of transformations with randomness """Base class for a list of transformations with randomness
Args: Args:
transforms (list or tuple): list of transformations transforms (sequence): list of transformations
""" """
def __init__(self, transforms): def __init__(self, transforms):
...@@ -441,7 +443,7 @@ class RandomApply(torch.nn.Module): ...@@ -441,7 +443,7 @@ class RandomApply(torch.nn.Module):
`lambda` functions or ``PIL.Image``. `lambda` functions or ``PIL.Image``.
Args: Args:
transforms (list or tuple or torch.nn.Module): list of transformations transforms (sequence or torch.nn.Module): list of transformations
p (float): probability p (float): probability
""" """
...@@ -488,29 +490,28 @@ class RandomChoice(RandomTransforms): ...@@ -488,29 +490,28 @@ class RandomChoice(RandomTransforms):
class RandomCrop(torch.nn.Module): class RandomCrop(torch.nn.Module):
"""Crop the given image at a random location. """Crop the given image at a random location.
The image can be a PIL Image or a Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
dimensions
Args: Args:
size (sequence or int): Desired output size of the crop. If size is an size (sequence or int): Desired output size of the crop. If size is an
int instead of sequence like (h, w), a square crop (size, size) is int instead of sequence like (h, w), a square crop (size, size) is
made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]). made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
padding (int or sequence, optional): Optional padding on each border padding (int or sequence, optional): Optional padding on each border
of the image. Default is None. If a single int is provided this of the image. Default is None. If a single int is provided this
is used to pad all borders. If tuple of length 2 is provided this is the padding is used to pad all borders. If sequence of length 2 is provided this is the padding
on left/right and top/bottom respectively. If a tuple of length 4 is provided on left/right and top/bottom respectively. If a sequence of length 4 is provided
this is the padding for the left, top, right and bottom borders respectively. this is the padding for the left, top, right and bottom borders respectively.
In torchscript mode padding as single int is not supported, use a tuple or In torchscript mode padding as single int is not supported, use a sequence of length 1: ``[padding, ]``.
list of length 1: ``[padding, ]``.
pad_if_needed (boolean): It will pad the image if smaller than the pad_if_needed (boolean): It will pad the image if smaller than the
desired size to avoid raising an exception. Since cropping is done desired size to avoid raising an exception. Since cropping is done
after padding, the padding seems to be done at a random offset. after padding, the padding seems to be done at a random offset.
fill (int or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of fill (number or str or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
length 3, it is used to fill R, G, B channels respectively. length 3, it is used to fill R, G, B channels respectively.
This value is only used when the padding_mode is constant This value is only used when the padding_mode is constant.
Only number is supported for torch Tensor.
Only int or str or tuple value is supported for PIL Image.
padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant. padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant.
Mode symmetric is not yet supported for Tensor inputs.
- constant: pads with a constant value, this value is specified with fill - constant: pads with a constant value, this value is specified with fill
...@@ -597,7 +598,7 @@ class RandomCrop(torch.nn.Module): ...@@ -597,7 +598,7 @@ class RandomCrop(torch.nn.Module):
class RandomHorizontalFlip(torch.nn.Module): class RandomHorizontalFlip(torch.nn.Module):
"""Horizontally flip the given image randomly with a given probability. """Horizontally flip the given image randomly with a given probability.
The image can be a PIL Image or a torch Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading to have [..., H, W] shape, where ... means an arbitrary number of leading
dimensions dimensions
...@@ -627,7 +628,7 @@ class RandomHorizontalFlip(torch.nn.Module): ...@@ -627,7 +628,7 @@ class RandomHorizontalFlip(torch.nn.Module):
class RandomVerticalFlip(torch.nn.Module): class RandomVerticalFlip(torch.nn.Module):
"""Vertically flip the given image randomly with a given probability. """Vertically flip the given image randomly with a given probability.
The image can be a PIL Image or a torch Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading to have [..., H, W] shape, where ... means an arbitrary number of leading
dimensions dimensions
...@@ -657,7 +658,7 @@ class RandomVerticalFlip(torch.nn.Module): ...@@ -657,7 +658,7 @@ class RandomVerticalFlip(torch.nn.Module):
class RandomPerspective(torch.nn.Module): class RandomPerspective(torch.nn.Module):
"""Performs a random perspective transformation of the given image with a given probability. """Performs a random perspective transformation of the given image with a given probability.
The image can be a PIL Image or a Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
Args: Args:
...@@ -668,9 +669,8 @@ class RandomPerspective(torch.nn.Module): ...@@ -668,9 +669,8 @@ class RandomPerspective(torch.nn.Module):
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``. :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed fill (sequence or number, optional): Pixel fill value for the area outside the transformed
image. If int or float, the value is used for all bands respectively. image. If given a number, the value is used for all bands respectively.
This option is supported for PIL image and Tensor inputs.
If input is PIL Image, the options is only available for ``Pillow>=5.0.0``. If input is PIL Image, the options is only available for ``Pillow>=5.0.0``.
""" """
...@@ -753,7 +753,7 @@ class RandomPerspective(torch.nn.Module): ...@@ -753,7 +753,7 @@ class RandomPerspective(torch.nn.Module):
class RandomResizedCrop(torch.nn.Module): class RandomResizedCrop(torch.nn.Module):
"""Crop the given image to random size and aspect ratio. """Crop the given image to random size and aspect ratio.
The image can be a PIL Image or a Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
A crop of random size (default: of 0.08 to 1.0) of the original size and a random A crop of random size (default: of 0.08 to 1.0) of the original size and a random
...@@ -764,7 +764,8 @@ class RandomResizedCrop(torch.nn.Module): ...@@ -764,7 +764,8 @@ class RandomResizedCrop(torch.nn.Module):
Args: Args:
size (int or sequence): expected output size of each edge. If size is an size (int or sequence): expected output size of each edge. If size is an
int instead of sequence like (h, w), a square output size ``(size, size)`` is int instead of sequence like (h, w), a square output size ``(size, size)`` is
made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]). made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``.
scale (tuple of float): scale range of the cropped image before resizing, relatively to the origin image. scale (tuple of float): scale range of the cropped image before resizing, relatively to the origin image.
ratio (tuple of float): aspect ratio range of the cropped image before resizing. ratio (tuple of float): aspect ratio range of the cropped image before resizing.
interpolation (InterpolationMode): Desired interpolation enum defined by interpolation (InterpolationMode): Desired interpolation enum defined by
...@@ -878,7 +879,7 @@ class RandomSizedCrop(RandomResizedCrop): ...@@ -878,7 +879,7 @@ class RandomSizedCrop(RandomResizedCrop):
class FiveCrop(torch.nn.Module): class FiveCrop(torch.nn.Module):
"""Crop the given image into four corners and the central crop. """Crop the given image into four corners and the central crop.
The image can be a PIL Image or a Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading to have [..., H, W] shape, where ... means an arbitrary number of leading
dimensions dimensions
...@@ -890,7 +891,7 @@ class FiveCrop(torch.nn.Module): ...@@ -890,7 +891,7 @@ class FiveCrop(torch.nn.Module):
Args: Args:
size (sequence or int): Desired output size of the crop. If size is an ``int`` size (sequence or int): Desired output size of the crop. If size is an ``int``
instead of sequence like (h, w), a square crop of size (size, size) is made. instead of sequence like (h, w), a square crop of size (size, size) is made.
If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]). If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
Example: Example:
>>> transform = Compose([ >>> transform = Compose([
...@@ -925,7 +926,7 @@ class FiveCrop(torch.nn.Module): ...@@ -925,7 +926,7 @@ class FiveCrop(torch.nn.Module):
class TenCrop(torch.nn.Module): class TenCrop(torch.nn.Module):
"""Crop the given image into four corners and the central crop plus the flipped version of """Crop the given image into four corners and the central crop plus the flipped version of
these (horizontal flipping is used by default). these (horizontal flipping is used by default).
The image can be a PIL Image or a Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading to have [..., H, W] shape, where ... means an arbitrary number of leading
dimensions dimensions
...@@ -937,7 +938,7 @@ class TenCrop(torch.nn.Module): ...@@ -937,7 +938,7 @@ class TenCrop(torch.nn.Module):
Args: Args:
size (sequence or int): Desired output size of the crop. If size is an size (sequence or int): Desired output size of the crop. If size is an
int instead of sequence like (h, w), a square crop (size, size) is int instead of sequence like (h, w), a square crop (size, size) is
made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]). made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
vertical_flip (bool): Use vertical flipping instead of horizontal vertical_flip (bool): Use vertical flipping instead of horizontal
Example: Example:
...@@ -974,6 +975,7 @@ class TenCrop(torch.nn.Module): ...@@ -974,6 +975,7 @@ class TenCrop(torch.nn.Module):
class LinearTransformation(torch.nn.Module): class LinearTransformation(torch.nn.Module):
"""Transform a tensor image with a square transformation matrix and a mean_vector computed """Transform a tensor image with a square transformation matrix and a mean_vector computed
offline. offline.
This transform does not support PIL Image.
Given transformation_matrix and mean_vector, will flatten the torch.*Tensor and Given transformation_matrix and mean_vector, will flatten the torch.*Tensor and
subtract mean_vector from it which is then followed by computing the dot subtract mean_vector from it which is then followed by computing the dot
product with the transformation matrix and then reshaping the tensor to its product with the transformation matrix and then reshaping the tensor to its
...@@ -1040,6 +1042,8 @@ class LinearTransformation(torch.nn.Module): ...@@ -1040,6 +1042,8 @@ class LinearTransformation(torch.nn.Module):
class ColorJitter(torch.nn.Module): class ColorJitter(torch.nn.Module):
"""Randomly change the brightness, contrast, saturation and hue of an image. """Randomly change the brightness, contrast, saturation and hue of an image.
If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
Args: Args:
brightness (float or tuple of float (min, max)): How much to jitter brightness. brightness (float or tuple of float (min, max)): How much to jitter brightness.
...@@ -1149,11 +1153,11 @@ class ColorJitter(torch.nn.Module): ...@@ -1149,11 +1153,11 @@ class ColorJitter(torch.nn.Module):
class RandomRotation(torch.nn.Module): class RandomRotation(torch.nn.Module):
"""Rotate the image by angle. """Rotate the image by angle.
The image can be a PIL Image or a Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
Args: Args:
degrees (sequence or float or int): Range of degrees to select from. degrees (sequence or number): Range of degrees to select from.
If degrees is a number instead of sequence like (min, max), the range of degrees If degrees is a number instead of sequence like (min, max), the range of degrees
will be (-degrees, +degrees). will be (-degrees, +degrees).
interpolation (InterpolationMode): Desired interpolation enum defined by interpolation (InterpolationMode): Desired interpolation enum defined by
...@@ -1164,11 +1168,10 @@ class RandomRotation(torch.nn.Module): ...@@ -1164,11 +1168,10 @@ class RandomRotation(torch.nn.Module):
If true, expands the output to make it large enough to hold the entire rotated image. If true, expands the output to make it large enough to hold the entire rotated image.
If false or omitted, make the output image the same size as the input image. If false or omitted, make the output image the same size as the input image.
Note that the expand flag assumes rotation around the center and no translation. Note that the expand flag assumes rotation around the center and no translation.
center (list or tuple, optional): Optional center of rotation, (x, y). Origin is the upper left corner. center (sequence, optional): Optional center of rotation, (x, y). Origin is the upper left corner.
Default is the center of the image. Default is the center of the image.
fill (sequence or int or float, optional): Pixel fill value for the area outside the rotated fill (sequence or number, optional): Pixel fill value for the area outside the rotated
image. If int or float, the value is used for all bands respectively. image. If given a number, the value is used for all bands respectively.
This option is supported for PIL image and Tensor inputs.
If input is PIL Image, the options is only available for ``Pillow>=5.2.0``. If input is PIL Image, the options is only available for ``Pillow>=5.2.0``.
resample (int, optional): deprecated argument and will be removed since v0.10.0. resample (int, optional): deprecated argument and will be removed since v0.10.0.
Please use `arg`:interpolation: instead. Please use `arg`:interpolation: instead.
...@@ -1249,11 +1252,11 @@ class RandomRotation(torch.nn.Module): ...@@ -1249,11 +1252,11 @@ class RandomRotation(torch.nn.Module):
class RandomAffine(torch.nn.Module): class RandomAffine(torch.nn.Module):
"""Random affine transformation of the image keeping center invariant. """Random affine transformation of the image keeping center invariant.
The image can be a PIL Image or a Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
Args: Args:
degrees (sequence or float or int): Range of degrees to select from. degrees (sequence or number): Range of degrees to select from.
If degrees is a number instead of sequence like (min, max), the range of degrees If degrees is a number instead of sequence like (min, max), the range of degrees
will be (-degrees, +degrees). Set to 0 to deactivate rotations. will be (-degrees, +degrees). Set to 0 to deactivate rotations.
translate (tuple, optional): tuple of maximum absolute fraction for horizontal translate (tuple, optional): tuple of maximum absolute fraction for horizontal
...@@ -1262,21 +1265,20 @@ class RandomAffine(torch.nn.Module): ...@@ -1262,21 +1265,20 @@ class RandomAffine(torch.nn.Module):
randomly sampled in the range -img_height * b < dy < img_height * b. Will not translate by default. randomly sampled in the range -img_height * b < dy < img_height * b. Will not translate by default.
scale (tuple, optional): scaling factor interval, e.g (a, b), then scale is scale (tuple, optional): scaling factor interval, e.g (a, b), then scale is
randomly sampled from the range a <= scale <= b. Will keep original scale by default. randomly sampled from the range a <= scale <= b. Will keep original scale by default.
shear (sequence or float or int, optional): Range of degrees to select from. shear (sequence or number, optional): Range of degrees to select from.
If shear is a number, a shear parallel to the x axis in the range (-shear, +shear) If shear is a number, a shear parallel to the x axis in the range (-shear, +shear)
will be applied. Else if shear is a tuple or list of 2 values a shear parallel to the x axis in the will be applied. Else if shear is a sequence of 2 values a shear parallel to the x axis in the
range (shear[0], shear[1]) will be applied. Else if shear is a tuple or list of 4 values, range (shear[0], shear[1]) will be applied. Else if shear is a sequence of 4 values,
a x-axis shear in (shear[0], shear[1]) and y-axis shear in (shear[2], shear[3]) will be applied. a x-axis shear in (shear[0], shear[1]) and y-axis shear in (shear[2], shear[3]) will be applied.
Will not apply shear by default. Will not apply shear by default.
interpolation (InterpolationMode): Desired interpolation enum defined by interpolation (InterpolationMode): Desired interpolation enum defined by
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
fill (sequence or int or float, optional): Pixel fill value for the area outside the transformed fill (sequence or number, optional): Pixel fill value for the area outside the transformed
image. If int or float, the value is used for all bands respectively. image. If given a number, the value is used for all bands respectively.
This option is supported for PIL image and Tensor inputs.
If input is PIL Image, the options is only available for ``Pillow>=5.0.0``. If input is PIL Image, the options is only available for ``Pillow>=5.0.0``.
fillcolor (sequence or int or float, optional): deprecated argument and will be removed since v0.10.0. fillcolor (sequence or number, optional): deprecated argument and will be removed since v0.10.0.
Please use `arg`:fill: instead. Please use `arg`:fill: instead.
resample (int, optional): deprecated argument and will be removed since v0.10.0. resample (int, optional): deprecated argument and will be removed since v0.10.0.
Please use `arg`:interpolation: instead. Please use `arg`:interpolation: instead.
...@@ -1412,9 +1414,8 @@ class RandomAffine(torch.nn.Module): ...@@ -1412,9 +1414,8 @@ class RandomAffine(torch.nn.Module):
class Grayscale(torch.nn.Module): class Grayscale(torch.nn.Module):
"""Convert image to grayscale. """Convert image to grayscale.
The image can be a PIL Image or a Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., 3, H, W] shape, where ... means an arbitrary number of leading to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions
dimensions
Args: Args:
num_output_channels (int): (1 or 3) number of channels desired for output image num_output_channels (int): (1 or 3) number of channels desired for output image
...@@ -1446,9 +1447,8 @@ class Grayscale(torch.nn.Module): ...@@ -1446,9 +1447,8 @@ class Grayscale(torch.nn.Module):
class RandomGrayscale(torch.nn.Module): class RandomGrayscale(torch.nn.Module):
"""Randomly convert image to grayscale with a probability of p (default 0.1). """Randomly convert image to grayscale with a probability of p (default 0.1).
The image can be a PIL Image or a Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., 3, H, W] shape, where ... means an arbitrary number of leading to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions
dimensions
Args: Args:
p (float): probability that image should be converted to grayscale. p (float): probability that image should be converted to grayscale.
...@@ -1483,7 +1483,8 @@ class RandomGrayscale(torch.nn.Module): ...@@ -1483,7 +1483,8 @@ class RandomGrayscale(torch.nn.Module):
class RandomErasing(torch.nn.Module): class RandomErasing(torch.nn.Module):
""" Randomly selects a rectangle region in an image and erases its pixels. """ Randomly selects a rectangle region in an torch Tensor image and erases its pixels.
This transform does not support PIL Image.
'Random Erasing Data Augmentation' by Zhong et al. See https://arxiv.org/abs/1708.04896 'Random Erasing Data Augmentation' by Zhong et al. See https://arxiv.org/abs/1708.04896
Args: Args:
...@@ -1539,8 +1540,8 @@ class RandomErasing(torch.nn.Module): ...@@ -1539,8 +1540,8 @@ class RandomErasing(torch.nn.Module):
Args: Args:
img (Tensor): Tensor image to be erased. img (Tensor): Tensor image to be erased.
scale (tuple or list): range of proportion of erased area against input image. scale (sequence): range of proportion of erased area against input image.
ratio (tuple or list): range of aspect ratio of erased area. ratio (sequence): range of aspect ratio of erased area.
value (list, optional): erasing value. If None, it is interpreted as "random" value (list, optional): erasing value. If None, it is interpreted as "random"
(erasing each pixel with random values). If ``len(value)`` is 1, it is interpreted as a number, (erasing each pixel with random values). If ``len(value)`` is 1, it is interpreted as a number,
i.e. ``value[0]``. i.e. ``value[0]``.
...@@ -1605,9 +1606,8 @@ class RandomErasing(torch.nn.Module): ...@@ -1605,9 +1606,8 @@ class RandomErasing(torch.nn.Module):
class GaussianBlur(torch.nn.Module): class GaussianBlur(torch.nn.Module):
"""Blurs image with randomly chosen Gaussian blur. """Blurs image with randomly chosen Gaussian blur.
The image can be a PIL Image or a Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., C, H, W] shape, where ... means an arbitrary number of leading to have [..., C, H, W] shape, where ... means an arbitrary number of leading dimensions.
dimensions
Args: Args:
kernel_size (int or sequence): Size of the Gaussian kernel. kernel_size (int or sequence): Size of the Gaussian kernel.
...@@ -1704,9 +1704,9 @@ def _setup_angle(x, name, req_sizes=(2, )): ...@@ -1704,9 +1704,9 @@ def _setup_angle(x, name, req_sizes=(2, )):
class RandomInvert(torch.nn.Module): class RandomInvert(torch.nn.Module):
"""Inverts the colors of the given image randomly with a given probability. """Inverts the colors of the given image randomly with a given probability.
The image can be a PIL Image or a torch Tensor, in which case it is expected If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format,
to have [..., H, W] shape, where ... means an arbitrary number of leading where ... means it can have an arbitrary number of leading dimensions.
dimensions. If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args: Args:
p (float): probability of the image being color inverted. Default value is 0.5 p (float): probability of the image being color inverted. Default value is 0.5
...@@ -1734,9 +1734,9 @@ class RandomInvert(torch.nn.Module): ...@@ -1734,9 +1734,9 @@ class RandomInvert(torch.nn.Module):
class RandomPosterize(torch.nn.Module): class RandomPosterize(torch.nn.Module):
"""Posterize the image randomly with a given probability by reducing the """Posterize the image randomly with a given probability by reducing the
number of bits for each color channel. The image can be a PIL Image or a torch number of bits for each color channel. If the image is torch Tensor, it should be of type torch.uint8,
Tensor, in which case it is expected to have [..., H, W] shape, where ... means and it is expected to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
an arbitrary number of leading dimensions. If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args: Args:
bits (int): number of bits to keep for each channel (0-8) bits (int): number of bits to keep for each channel (0-8)
...@@ -1766,9 +1766,9 @@ class RandomPosterize(torch.nn.Module): ...@@ -1766,9 +1766,9 @@ class RandomPosterize(torch.nn.Module):
class RandomSolarize(torch.nn.Module): class RandomSolarize(torch.nn.Module):
"""Solarize the image randomly with a given probability by inverting all pixel """Solarize the image randomly with a given probability by inverting all pixel
values above a threshold. The image can be a PIL Image or a torch Tensor, in values above a threshold. If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format,
which case it is expected to have [..., H, W] shape, where ... means an arbitrary where ... means it can have an arbitrary number of leading dimensions.
number of leading dimensions. If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args: Args:
threshold (float): all pixels equal or above this value are inverted. threshold (float): all pixels equal or above this value are inverted.
...@@ -1797,9 +1797,8 @@ class RandomSolarize(torch.nn.Module): ...@@ -1797,9 +1797,8 @@ class RandomSolarize(torch.nn.Module):
class RandomAdjustSharpness(torch.nn.Module): class RandomAdjustSharpness(torch.nn.Module):
"""Adjust the sharpness of the image randomly with a given probability. The image """Adjust the sharpness of the image randomly with a given probability. If the image is torch Tensor,
can be a PIL Image or a torch Tensor, in which case it is expected to have [..., H, W] it is expected to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
shape, where ... means an arbitrary number of leading dimensions.
Args: Args:
sharpness_factor (float): How much to adjust the sharpness. Can be sharpness_factor (float): How much to adjust the sharpness. Can be
...@@ -1831,9 +1830,9 @@ class RandomAdjustSharpness(torch.nn.Module): ...@@ -1831,9 +1830,9 @@ class RandomAdjustSharpness(torch.nn.Module):
class RandomAutocontrast(torch.nn.Module): class RandomAutocontrast(torch.nn.Module):
"""Autocontrast the pixels of the given image randomly with a given probability. """Autocontrast the pixels of the given image randomly with a given probability.
The image can be a PIL Image or a torch Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
dimensions. If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args: Args:
p (float): probability of the image being autocontrasted. Default value is 0.5 p (float): probability of the image being autocontrasted. Default value is 0.5
...@@ -1861,9 +1860,9 @@ class RandomAutocontrast(torch.nn.Module): ...@@ -1861,9 +1860,9 @@ class RandomAutocontrast(torch.nn.Module):
class RandomEqualize(torch.nn.Module): class RandomEqualize(torch.nn.Module):
"""Equalize the histogram of the given image randomly with a given probability. """Equalize the histogram of the given image randomly with a given probability.
The image can be a PIL Image or a torch Tensor, in which case it is expected If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
dimensions. If img is PIL Image, it is expected to be in mode "P", "L" or "RGB".
Args: Args:
p (float): probability of the image being equalized. Default value is 0.5 p (float): probability of the image being equalized. Default value is 0.5
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment