Unverified Commit 928b05ca authored by Nicolas Hug's avatar Nicolas Hug Committed by GitHub
Browse files

Added docs for v2 transforms (part 1) (#7297)


Co-authored-by: default avatarvfdev <vfdev.5@gmail.com>
Co-authored-by: default avatarPhilip Meier <github.pmeier@posteo.de>
parent d03b776a
...@@ -33,6 +33,8 @@ from tabulate import tabulate ...@@ -33,6 +33,8 @@ from tabulate import tabulate
sys.path.append(os.path.abspath(".")) sys.path.append(os.path.abspath("."))
torchvision.disable_beta_transforms_warning()
# -- General configuration ------------------------------------------------ # -- General configuration ------------------------------------------------
# Required version of sphinx is set from docs/requirements.txt # Required version of sphinx is set from docs/requirements.txt
......
...@@ -98,17 +98,29 @@ Geometry ...@@ -98,17 +98,29 @@ Geometry
:template: class.rst :template: class.rst
Resize Resize
v2.Resize
RandomCrop RandomCrop
v2.RandomCrop
RandomResizedCrop RandomResizedCrop
v2.RandomResizedCrop
CenterCrop CenterCrop
v2.CenterCrop
FiveCrop FiveCrop
v2.FiveCrop
TenCrop TenCrop
v2.TenCrop
Pad Pad
v2.Pad
RandomAffine RandomAffine
v2.RandomAffine
RandomPerspective RandomPerspective
v2.RandomPerspective
RandomRotation RandomRotation
v2.RandomRotation
RandomHorizontalFlip RandomHorizontalFlip
v2.RandomHorizontalFlip
RandomVerticalFlip RandomVerticalFlip
v2.RandomVerticalFlip
Color Color
----- -----
...@@ -118,15 +130,25 @@ Color ...@@ -118,15 +130,25 @@ Color
:template: class.rst :template: class.rst
ColorJitter ColorJitter
v2.ColorJitter
Grayscale Grayscale
v2.Grayscale
RandomGrayscale RandomGrayscale
v2.RandomGrayscale
GaussianBlur GaussianBlur
v2.GaussianBlur
RandomInvert RandomInvert
v2.RandomInvert
RandomPosterize RandomPosterize
v2.RandomPosterize
RandomSolarize RandomSolarize
v2.RandomSolarize
RandomAdjustSharpness RandomAdjustSharpness
v2.RandomAdjustSharpness
RandomAutocontrast RandomAutocontrast
v2.RandomAutocontrast
RandomEqualize RandomEqualize
v2.RandomEqualize
Composition Composition
----------- -----------
...@@ -136,9 +158,13 @@ Composition ...@@ -136,9 +158,13 @@ Composition
:template: class.rst :template: class.rst
Compose Compose
v2.Compose
RandomApply RandomApply
v2.RandomApply
RandomChoice RandomChoice
v2.RandomChoice
RandomOrder RandomOrder
v2.RandomOrder
Miscellaneous Miscellaneous
------------- -------------
...@@ -148,9 +174,13 @@ Miscellaneous ...@@ -148,9 +174,13 @@ Miscellaneous
:template: class.rst :template: class.rst
LinearTransformation LinearTransformation
v2.LinearTransformation
Normalize Normalize
v2.Normalize
RandomErasing RandomErasing
v2.RandomErasing
Lambda Lambda
v2.Lambda
.. _conversion_transforms: .. _conversion_transforms:
...@@ -162,9 +192,15 @@ Conversion ...@@ -162,9 +192,15 @@ Conversion
:template: class.rst :template: class.rst
ToPILImage ToPILImage
v2.ToPILImage
v2.ToImagePIL
ToTensor ToTensor
v2.ToTensor
PILToTensor PILToTensor
v2.PILToTensor
ConvertImageDtype ConvertImageDtype
v2.ConvertImageDtype
v2.ConvertDtype
Auto-Augmentation Auto-Augmentation
----------------- -----------------
...@@ -181,9 +217,13 @@ The new transform can be used standalone or mixed-and-matched with existing tran ...@@ -181,9 +217,13 @@ The new transform can be used standalone or mixed-and-matched with existing tran
AutoAugmentPolicy AutoAugmentPolicy
AutoAugment AutoAugment
v2.AutoAugment
RandAugment RandAugment
v2.RandAugment
TrivialAugmentWide TrivialAugmentWide
v2.TrivialAugmentWide
AugMix AugMix
v2.AugMix
.. _functional_transforms: .. _functional_transforms:
......
...@@ -13,6 +13,38 @@ from .utils import is_simple_tensor, query_chw ...@@ -13,6 +13,38 @@ from .utils import is_simple_tensor, query_chw
class RandomErasing(_RandomApplyTransform): class RandomErasing(_RandomApplyTransform):
"""[BETA] Randomly selects a rectangle region in the input image or video and erases its pixels.
.. betastatus:: RandomErasing transform
This transform does not support PIL Image.
'Random Erasing Data Augmentation' by Zhong et al. See https://arxiv.org/abs/1708.04896
Args:
p: probability that the random erasing operation will be performed.
scale: range of proportion of erased area against input image.
ratio: range of aspect ratio of erased area.
value: erasing value. Default is 0. If a single int, it is used to
erase all pixels. If a tuple of length 3, it is used to erase
R, G, B channels respectively.
If a str of 'random', erasing each pixel with random values.
inplace: boolean to make this transform inplace. Default set to False.
Returns:
Erased input.
Example:
>>> from torchvision.transforms import v2 as transforms
>>>
>>> transform = transforms.Compose([
>>> transforms.RandomHorizontalFlip(),
>>> transforms.PILToTensor(),
>>> transforms.ConvertImageDtype(torch.float),
>>> transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
>>> transforms.RandomErasing(),
>>> ])
"""
_v1_transform_cls = _transforms.RandomErasing _v1_transform_cls = _transforms.RandomErasing
def _extract_params_for_v1_transform(self) -> Dict[str, Any]: def _extract_params_for_v1_transform(self) -> Dict[str, Any]:
......
...@@ -162,6 +162,24 @@ class _AutoAugmentBase(Transform): ...@@ -162,6 +162,24 @@ class _AutoAugmentBase(Transform):
class AutoAugment(_AutoAugmentBase): class AutoAugment(_AutoAugmentBase):
r"""[BETA] AutoAugment data augmentation method based on
`"AutoAugment: Learning Augmentation Strategies from Data" <https://arxiv.org/pdf/1805.09501.pdf>`_.
.. betastatus:: AutoAugment transform
If the image is torch Tensor, it should be of type torch.uint8, and it is expected
to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args:
policy (AutoAugmentPolicy): Desired policy enum defined by
:class:`torchvision.transforms.autoaugment.AutoAugmentPolicy`. Default is ``AutoAugmentPolicy.IMAGENET``.
interpolation (InterpolationMode): Desired interpolation enum defined by
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
fill (sequence or number, optional): Pixel fill value for the area outside the transformed
image. If given a number, the value is used for all bands respectively.
"""
_v1_transform_cls = _transforms.AutoAugment _v1_transform_cls = _transforms.AutoAugment
_AUGMENTATION_SPACE = { _AUGMENTATION_SPACE = {
...@@ -318,6 +336,27 @@ class AutoAugment(_AutoAugmentBase): ...@@ -318,6 +336,27 @@ class AutoAugment(_AutoAugmentBase):
class RandAugment(_AutoAugmentBase): class RandAugment(_AutoAugmentBase):
r"""[BETA] RandAugment data augmentation method based on
`"RandAugment: Practical automated data augmentation with a reduced search space"
<https://arxiv.org/abs/1909.13719>`_.
.. betastatus:: RandAugment transform
If the image is torch Tensor, it should be of type torch.uint8, and it is expected
to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args:
num_ops (int): Number of augmentation transformations to apply sequentially.
magnitude (int): Magnitude for all the transformations.
num_magnitude_bins (int): The number of different magnitude values.
interpolation (InterpolationMode): Desired interpolation enum defined by
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
fill (sequence or number, optional): Pixel fill value for the area outside the transformed
image. If given a number, the value is used for all bands respectively.
"""
_v1_transform_cls = _transforms.RandAugment _v1_transform_cls = _transforms.RandAugment
_AUGMENTATION_SPACE = { _AUGMENTATION_SPACE = {
"Identity": (lambda num_bins, height, width: None, False), "Identity": (lambda num_bins, height, width: None, False),
...@@ -379,6 +418,24 @@ class RandAugment(_AutoAugmentBase): ...@@ -379,6 +418,24 @@ class RandAugment(_AutoAugmentBase):
class TrivialAugmentWide(_AutoAugmentBase): class TrivialAugmentWide(_AutoAugmentBase):
r"""[BETA] Dataset-independent data-augmentation with TrivialAugment Wide, as described in
`"TrivialAugment: Tuning-free Yet State-of-the-Art Data Augmentation" <https://arxiv.org/abs/2103.10158>`_.
.. betastatus:: TrivialAugmentWide transform
If the image is torch Tensor, it should be of type torch.uint8, and it is expected
to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args:
num_magnitude_bins (int): The number of different magnitude values.
interpolation (InterpolationMode): Desired interpolation enum defined by
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
fill (sequence or number, optional): Pixel fill value for the area outside the transformed
image. If given a number, the value is used for all bands respectively.
"""
_v1_transform_cls = _transforms.TrivialAugmentWide _v1_transform_cls = _transforms.TrivialAugmentWide
_AUGMENTATION_SPACE = { _AUGMENTATION_SPACE = {
"Identity": (lambda num_bins, height, width: None, False), "Identity": (lambda num_bins, height, width: None, False),
...@@ -430,6 +487,29 @@ class TrivialAugmentWide(_AutoAugmentBase): ...@@ -430,6 +487,29 @@ class TrivialAugmentWide(_AutoAugmentBase):
class AugMix(_AutoAugmentBase): class AugMix(_AutoAugmentBase):
r"""[BETA] AugMix data augmentation method based on
`"AugMix: A Simple Data Processing Method to Improve Robustness and Uncertainty" <https://arxiv.org/abs/1912.02781>`_.
.. betastatus:: AugMix transform
If the image is torch Tensor, it should be of type torch.uint8, and it is expected
to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args:
severity (int): The severity of base augmentation operators. Default is ``3``.
mixture_width (int): The number of augmentation chains. Default is ``3``.
chain_depth (int): The depth of augmentation chains. A negative value denotes stochastic depth sampled from the interval [1, 3].
Default is ``-1``.
alpha (float): The hyperparameter for the probability distributions. Default is ``1.0``.
all_ops (bool): Use all operations (including brightness, contrast, color and sharpness). Default is ``True``.
interpolation (InterpolationMode): Desired interpolation enum defined by
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
fill (sequence or number, optional): Pixel fill value for the area outside the transformed
image. If given a number, the value is used for all bands respectively.
"""
_v1_transform_cls = _transforms.AugMix _v1_transform_cls = _transforms.AugMix
_PARTIAL_AUGMENTATION_SPACE = { _PARTIAL_AUGMENTATION_SPACE = {
......
...@@ -11,6 +11,23 @@ from .utils import is_simple_tensor, query_chw ...@@ -11,6 +11,23 @@ from .utils import is_simple_tensor, query_chw
class Grayscale(Transform): class Grayscale(Transform):
"""[BETA] Convert images or videos to grayscale.
.. betastatus:: Grayscale transform
If the image is torch Tensor, it is expected
to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions
Args:
num_output_channels (int): (1 or 3) number of channels desired for output image
Returns:
PIL Image: Grayscale version of the input.
- If ``num_output_channels == 1`` : returned image is single channel
- If ``num_output_channels == 3`` : returned image is 3 channel with r == g == b
"""
_v1_transform_cls = _transforms.Grayscale _v1_transform_cls = _transforms.Grayscale
_transformed_types = ( _transformed_types = (
...@@ -29,6 +46,24 @@ class Grayscale(Transform): ...@@ -29,6 +46,24 @@ class Grayscale(Transform):
class RandomGrayscale(_RandomApplyTransform): class RandomGrayscale(_RandomApplyTransform):
"""[BETA] Randomly convert image to grayscale with a probability of p (default 0.1).
.. betastatus:: RandomGrayscale transform
If the image is torch Tensor, it is expected
to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions
Args:
p (float): probability that image should be converted to grayscale.
Returns:
PIL Image or Tensor: Grayscale version of the input image with probability p and unchanged
with probability (1-p).
- If input image is 1 channel: grayscale version is 1 channel
- If input image is 3 channel: grayscale version is 3 channel with r == g == b
"""
_v1_transform_cls = _transforms.RandomGrayscale _v1_transform_cls = _transforms.RandomGrayscale
_transformed_types = ( _transformed_types = (
...@@ -50,6 +85,32 @@ class RandomGrayscale(_RandomApplyTransform): ...@@ -50,6 +85,32 @@ class RandomGrayscale(_RandomApplyTransform):
class ColorJitter(Transform): class ColorJitter(Transform):
"""[BETA] Randomly change the brightness, contrast, saturation and hue of an image.
.. betastatus:: ColorJitter transform
If the image is torch Tensor, it is expected
to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
If img is PIL Image, mode "1", "I", "F" and modes with transparency (alpha channel) are not supported.
Args:
brightness (float or tuple of float (min, max)): How much to jitter brightness.
brightness_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
or the given [min, max]. Should be non negative numbers.
contrast (float or tuple of float (min, max)): How much to jitter contrast.
contrast_factor is chosen uniformly from [max(0, 1 - contrast), 1 + contrast]
or the given [min, max]. Should be non-negative numbers.
saturation (float or tuple of float (min, max)): How much to jitter saturation.
saturation_factor is chosen uniformly from [max(0, 1 - saturation), 1 + saturation]
or the given [min, max]. Should be non negative numbers.
hue (float or tuple of float (min, max)): How much to jitter hue.
hue_factor is chosen uniformly from [-hue, hue] or the given [min, max].
Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5.
To jitter hue, the pixel values of the input image has to be non-negative for conversion to HSV space;
thus it does not work if you normalize your image to an interval with negative values,
or use an interpolation that generates negative values before using this function.
"""
_v1_transform_cls = _transforms.ColorJitter _v1_transform_cls = _transforms.ColorJitter
def _extract_params_for_v1_transform(self) -> Dict[str, Any]: def _extract_params_for_v1_transform(self) -> Dict[str, Any]:
...@@ -205,6 +266,18 @@ class RandomPhotometricDistort(Transform): ...@@ -205,6 +266,18 @@ class RandomPhotometricDistort(Transform):
class RandomEqualize(_RandomApplyTransform): class RandomEqualize(_RandomApplyTransform):
"""[BETA] Equalize the histogram of the given image randomly with a given probability.
.. betastatus:: RandomEqualize transform
If the image is torch Tensor, it is expected
to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
If img is PIL Image, it is expected to be in mode "P", "L" or "RGB".
Args:
p (float): probability of the image being equalized. Default value is 0.5
"""
_v1_transform_cls = _transforms.RandomEqualize _v1_transform_cls = _transforms.RandomEqualize
def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
...@@ -212,6 +285,18 @@ class RandomEqualize(_RandomApplyTransform): ...@@ -212,6 +285,18 @@ class RandomEqualize(_RandomApplyTransform):
class RandomInvert(_RandomApplyTransform): class RandomInvert(_RandomApplyTransform):
"""[BETA] Inverts the colors of the given image randomly with a given probability.
.. betastatus:: RandomInvert transform
If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format,
where ... means it can have an arbitrary number of leading dimensions.
If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args:
p (float): probability of the image being color inverted. Default value is 0.5
"""
_v1_transform_cls = _transforms.RandomInvert _v1_transform_cls = _transforms.RandomInvert
def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
...@@ -219,6 +304,20 @@ class RandomInvert(_RandomApplyTransform): ...@@ -219,6 +304,20 @@ class RandomInvert(_RandomApplyTransform):
class RandomPosterize(_RandomApplyTransform): class RandomPosterize(_RandomApplyTransform):
"""[BETA] Posterize the image randomly with a given probability by reducing the
number of bits for each color channel.
.. betastatus:: RandomPosterize transform
If the image is torch Tensor, it should be of type torch.uint8,
and it is expected to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args:
bits (int): number of bits to keep for each channel (0-8)
p (float): probability of the image being posterized. Default value is 0.5
"""
_v1_transform_cls = _transforms.RandomPosterize _v1_transform_cls = _transforms.RandomPosterize
def __init__(self, bits: int, p: float = 0.5) -> None: def __init__(self, bits: int, p: float = 0.5) -> None:
...@@ -230,6 +329,20 @@ class RandomPosterize(_RandomApplyTransform): ...@@ -230,6 +329,20 @@ class RandomPosterize(_RandomApplyTransform):
class RandomSolarize(_RandomApplyTransform): class RandomSolarize(_RandomApplyTransform):
"""[BETA] Solarize the image randomly with a given probability by inverting all pixel
values above a threshold.
.. betastatus:: RandomSolarize transform
If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format,
where ... means it can have an arbitrary number of leading dimensions.
If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args:
threshold (float): all pixels equal or above this value are inverted.
p (float): probability of the image being solarized. Default value is 0.5
"""
_v1_transform_cls = _transforms.RandomSolarize _v1_transform_cls = _transforms.RandomSolarize
def __init__(self, threshold: float, p: float = 0.5) -> None: def __init__(self, threshold: float, p: float = 0.5) -> None:
...@@ -241,6 +354,18 @@ class RandomSolarize(_RandomApplyTransform): ...@@ -241,6 +354,18 @@ class RandomSolarize(_RandomApplyTransform):
class RandomAutocontrast(_RandomApplyTransform): class RandomAutocontrast(_RandomApplyTransform):
"""[BETA] Autocontrast the pixels of the given image randomly with a given probability.
.. betastatus:: RandomAutocontrast transform
If the image is torch Tensor, it is expected
to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args:
p (float): probability of the image being autocontrasted. Default value is 0.5
"""
_v1_transform_cls = _transforms.RandomAutocontrast _v1_transform_cls = _transforms.RandomAutocontrast
def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
...@@ -248,6 +373,20 @@ class RandomAutocontrast(_RandomApplyTransform): ...@@ -248,6 +373,20 @@ class RandomAutocontrast(_RandomApplyTransform):
class RandomAdjustSharpness(_RandomApplyTransform): class RandomAdjustSharpness(_RandomApplyTransform):
"""[BETA] Adjust the sharpness of the image randomly with a given probability.
.. betastatus:: RandomAdjustSharpness transform
If the image is torch Tensor,
it is expected to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
Args:
sharpness_factor (float): How much to adjust the sharpness. Can be
any non-negative number. 0 gives a blurred image, 1 gives the
original image while 2 increases the sharpness by a factor of 2.
p (float): probability of the image being sharpened. Default value is 0.5
"""
_v1_transform_cls = _transforms.RandomAdjustSharpness _v1_transform_cls = _transforms.RandomAdjustSharpness
def __init__(self, sharpness_factor: float, p: float = 0.5) -> None: def __init__(self, sharpness_factor: float, p: float = 0.5) -> None:
......
...@@ -9,6 +9,37 @@ from torchvision.transforms.v2 import Transform ...@@ -9,6 +9,37 @@ from torchvision.transforms.v2 import Transform
class Compose(Transform): class Compose(Transform):
"""[BETA] Composes several transforms together.
.. betastatus:: Compose transform
This transform does not support torchscript.
Please, see the note below.
Args:
transforms (list of ``Transform`` objects): list of transforms to compose.
Example:
>>> transforms.Compose([
>>> transforms.CenterCrop(10),
>>> transforms.PILToTensor(),
>>> transforms.ConvertImageDtype(torch.float),
>>> ])
.. note::
In order to script the transformations, please use ``torch.nn.Sequential`` as below.
>>> transforms = torch.nn.Sequential(
>>> transforms.CenterCrop(10),
>>> transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
>>> )
>>> scripted_transforms = torch.jit.script(transforms)
Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
`lambda` functions or ``PIL.Image``.
"""
def __init__(self, transforms: Sequence[Callable]) -> None: def __init__(self, transforms: Sequence[Callable]) -> None:
super().__init__() super().__init__()
if not isinstance(transforms, Sequence): if not isinstance(transforms, Sequence):
...@@ -29,6 +60,27 @@ class Compose(Transform): ...@@ -29,6 +60,27 @@ class Compose(Transform):
class RandomApply(Transform): class RandomApply(Transform):
"""[BETA] Apply randomly a list of transformations with a given probability.
.. betastatus:: RandomApply transform
.. note::
In order to script the transformation, please use ``torch.nn.ModuleList`` as input instead of list/tuple of
transforms as shown below:
>>> transforms = transforms.RandomApply(torch.nn.ModuleList([
>>> transforms.ColorJitter(),
>>> ]), p=0.3)
>>> scripted_transforms = torch.jit.script(transforms)
Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
`lambda` functions or ``PIL.Image``.
Args:
transforms (sequence or torch.nn.Module): list of transformations
p (float): probability
"""
_v1_transform_cls = _transforms.RandomApply _v1_transform_cls = _transforms.RandomApply
def __init__(self, transforms: Union[Sequence[Callable], nn.ModuleList], p: float = 0.5) -> None: def __init__(self, transforms: Union[Sequence[Callable], nn.ModuleList], p: float = 0.5) -> None:
...@@ -63,6 +115,12 @@ class RandomApply(Transform): ...@@ -63,6 +115,12 @@ class RandomApply(Transform):
class RandomChoice(Transform): class RandomChoice(Transform):
"""[BETA] Apply single transformation randomly picked from a list.
.. betastatus:: RandomChoice transform
This transform does not support torchscript."""
def __init__( def __init__(
self, self,
transforms: Sequence[Callable], transforms: Sequence[Callable],
...@@ -99,6 +157,13 @@ class RandomChoice(Transform): ...@@ -99,6 +157,13 @@ class RandomChoice(Transform):
class RandomOrder(Transform): class RandomOrder(Transform):
"""[BETA] Apply a list of transformations in a random order.
.. betastatus:: RandomOrder transform
This transform does not support torchscript.
"""
def __init__(self, transforms: Sequence[Callable]) -> None: def __init__(self, transforms: Sequence[Callable]) -> None:
if not isinstance(transforms, Sequence): if not isinstance(transforms, Sequence):
raise TypeError("Argument transforms should be a sequence of callables") raise TypeError("Argument transforms should be a sequence of callables")
......
...@@ -10,6 +10,31 @@ from torchvision.transforms.v2 import Transform ...@@ -10,6 +10,31 @@ from torchvision.transforms.v2 import Transform
class ToTensor(Transform): class ToTensor(Transform):
"""[BETA] Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
.. betastatus:: ToTensor transform
.. warning::
:class:`v2.ToTensor` is deprecated and will be removed in a future release.
Please use instead ``transforms.Compose([transforms.ToImageTensor(), transforms.ConvertImageDtype()])``.
This transform does not support torchscript.
Converts a PIL Image or numpy.ndarray (H x W x C) in the range
[0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1)
or if the numpy.ndarray has dtype = np.uint8
In the other cases, tensors are returned without scaling.
.. note::
Because the input image is scaled to [0.0, 1.0], this transformation should not be used when
transforming target image masks. See the `references`_ for implementing the transforms for image masks.
.. _references: https://github.com/pytorch/vision/tree/main/references/segmentation
"""
_transformed_types = (PIL.Image.Image, np.ndarray) _transformed_types = (PIL.Image.Image, np.ndarray)
def __init__(self) -> None: def __init__(self) -> None:
......
This diff is collapsed.
...@@ -22,6 +22,27 @@ class ConvertBoundingBoxFormat(Transform): ...@@ -22,6 +22,27 @@ class ConvertBoundingBoxFormat(Transform):
class ConvertDtype(Transform): class ConvertDtype(Transform):
"""[BETA] Convert a tensor image/box/mask to the given ``dtype`` and scale the values accordingly
.. betastatus:: ConvertDtype transform
This function does not support PIL Image.
Args:
dtype (torch.dtype): Desired data type of the output
.. note::
When converting from a smaller to a larger integer ``dtype`` the maximum values are **not** mapped exactly.
If converted back and forth, this mismatch has no effect.
Raises:
RuntimeError: When trying to cast :class:`torch.float32` to :class:`torch.int32` or :class:`torch.int64` as
well as for trying to cast :class:`torch.float64` to :class:`torch.int64`. These conversions might lead to
overflow errors since the floating point ``dtype`` cannot store consecutive integers over the whole range
of the integer ``dtype``.
"""
_v1_transform_cls = _transforms.ConvertImageDtype _v1_transform_cls = _transforms.ConvertImageDtype
_transformed_types = (is_simple_tensor, datapoints.Image, datapoints.Video) _transformed_types = (is_simple_tensor, datapoints.Image, datapoints.Video)
......
...@@ -21,6 +21,16 @@ class Identity(Transform): ...@@ -21,6 +21,16 @@ class Identity(Transform):
class Lambda(Transform): class Lambda(Transform):
"""[BETA] Apply a user-defined lambda as a transform.
.. betastatus:: Lambda transform
This transform does not support torchscript.
Args:
lambd (function): Lambda/function to be used for transform.
"""
def __init__(self, lambd: Callable[[Any], Any], *types: Type): def __init__(self, lambd: Callable[[Any], Any], *types: Type):
super().__init__() super().__init__()
self.lambd = lambd self.lambd = lambd
...@@ -42,6 +52,26 @@ class Lambda(Transform): ...@@ -42,6 +52,26 @@ class Lambda(Transform):
class LinearTransformation(Transform): class LinearTransformation(Transform):
"""[BETA] Transform a tensor image with a square transformation matrix and a mean_vector computed offline.
.. betastatus:: LinearTransformation transform
This transform does not support PIL Image.
Given transformation_matrix and mean_vector, will flatten the torch.*Tensor and
subtract mean_vector from it which is then followed by computing the dot
product with the transformation matrix and then reshaping the tensor to its
original shape.
Applications:
whitening transformation: Suppose X is a column vector zero-centered data.
Then compute the data covariance matrix [D x D] with torch.mm(X.t(), X),
perform SVD on this matrix and pass it as transformation_matrix.
Args:
transformation_matrix (Tensor): tensor [D x D], D = C x H x W
mean_vector (Tensor): tensor [D], D = C x H x W
"""
_v1_transform_cls = _transforms.LinearTransformation _v1_transform_cls = _transforms.LinearTransformation
_transformed_types = (is_simple_tensor, datapoints.Image, datapoints.Video) _transformed_types = (is_simple_tensor, datapoints.Image, datapoints.Video)
...@@ -105,6 +135,26 @@ class LinearTransformation(Transform): ...@@ -105,6 +135,26 @@ class LinearTransformation(Transform):
class Normalize(Transform): class Normalize(Transform):
"""[BETA] Normalize a tensor image with mean and standard deviation.
.. betastatus:: Normalize transform
This transform does not support PIL Image.
Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
channels, this transform will normalize each channel of the input
``torch.*Tensor`` i.e.,
``output[channel] = (input[channel] - mean[channel]) / std[channel]``
.. note::
This transform acts out of place, i.e., it does not mutate the input tensor.
Args:
mean (sequence): Sequence of means for each channel.
std (sequence): Sequence of standard deviations for each channel.
inplace(bool,optional): Bool to make this operation in-place.
"""
_v1_transform_cls = _transforms.Normalize _v1_transform_cls = _transforms.Normalize
_transformed_types = (datapoints.Image, is_simple_tensor, datapoints.Video) _transformed_types = (datapoints.Image, is_simple_tensor, datapoints.Video)
...@@ -125,6 +175,24 @@ class Normalize(Transform): ...@@ -125,6 +175,24 @@ class Normalize(Transform):
class GaussianBlur(Transform): class GaussianBlur(Transform):
"""[BETA] Blurs image with randomly chosen Gaussian blur.
.. betastatus:: GausssianBlur transform
If the image is torch Tensor, it is expected
to have [..., C, H, W] shape, where ... means an arbitrary number of leading dimensions.
Args:
kernel_size (int or sequence): Size of the Gaussian kernel.
sigma (float or tuple of float (min, max)): Standard deviation to be used for
creating kernel to perform blurring. If float, sigma is fixed. If it is tuple
of float (min, max), sigma is chosen uniformly at random to lie in the
given range.
Returns:
PIL Image or Tensor: Gaussian blurred version of the input image.
"""
_v1_transform_cls = _transforms.GaussianBlur _v1_transform_cls = _transforms.GaussianBlur
def __init__( def __init__(
......
...@@ -11,6 +11,15 @@ from torchvision.transforms.v2.utils import is_simple_tensor ...@@ -11,6 +11,15 @@ from torchvision.transforms.v2.utils import is_simple_tensor
class PILToTensor(Transform): class PILToTensor(Transform):
"""[BETA] Convert a ``PIL Image`` to a tensor of the same type.
.. betastatus:: PILToTensor transform
This transform does not support torchscript.
Converts a PIL Image (H x W x C) to a Tensor of shape (C x H x W).
"""
_transformed_types = (PIL.Image.Image,) _transformed_types = (PIL.Image.Image,)
def _transform(self, inpt: PIL.Image.Image, params: Dict[str, Any]) -> torch.Tensor: def _transform(self, inpt: PIL.Image.Image, params: Dict[str, Any]) -> torch.Tensor:
...@@ -27,6 +36,27 @@ class ToImageTensor(Transform): ...@@ -27,6 +36,27 @@ class ToImageTensor(Transform):
class ToImagePIL(Transform): class ToImagePIL(Transform):
"""[BETA] Convert a tensor or an ndarray to PIL Image.
.. betastatus:: ToImagePIL transform
This transform does not support torchscript.
Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape
H x W x C to a PIL Image while preserving the value range.
Args:
mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
If ``mode`` is ``None`` (default) there are some assumptions made about the input data:
- If the input has 4 channels, the ``mode`` is assumed to be ``RGBA``.
- If the input has 3 channels, the ``mode`` is assumed to be ``RGB``.
- If the input has 2 channels, the ``mode`` is assumed to be ``LA``.
- If the input has 1 channel, the ``mode`` is determined by the data type (i.e ``int``, ``float``,
``short``).
.. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes
"""
_transformed_types = (is_simple_tensor, datapoints.Image, np.ndarray) _transformed_types = (is_simple_tensor, datapoints.Image, np.ndarray)
def __init__(self, mode: Optional[str] = None) -> None: def __init__(self, mode: Optional[str] = None) -> None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment