Unverified Commit 928b05ca authored by Nicolas Hug's avatar Nicolas Hug Committed by GitHub
Browse files

Added docs for v2 transforms (part 1) (#7297)


Co-authored-by: default avatarvfdev <vfdev.5@gmail.com>
Co-authored-by: default avatarPhilip Meier <github.pmeier@posteo.de>
parent d03b776a
......@@ -33,6 +33,8 @@ from tabulate import tabulate
sys.path.append(os.path.abspath("."))
torchvision.disable_beta_transforms_warning()
# -- General configuration ------------------------------------------------
# Required version of sphinx is set from docs/requirements.txt
......
......@@ -98,17 +98,29 @@ Geometry
:template: class.rst
Resize
v2.Resize
RandomCrop
v2.RandomCrop
RandomResizedCrop
v2.RandomResizedCrop
CenterCrop
v2.CenterCrop
FiveCrop
v2.FiveCrop
TenCrop
v2.TenCrop
Pad
v2.Pad
RandomAffine
v2.RandomAffine
RandomPerspective
v2.RandomPerspective
RandomRotation
v2.RandomRotation
RandomHorizontalFlip
v2.RandomHorizontalFlip
RandomVerticalFlip
v2.RandomVerticalFlip
Color
-----
......@@ -118,15 +130,25 @@ Color
:template: class.rst
ColorJitter
v2.ColorJitter
Grayscale
v2.Grayscale
RandomGrayscale
v2.RandomGrayscale
GaussianBlur
v2.GaussianBlur
RandomInvert
v2.RandomInvert
RandomPosterize
v2.RandomPosterize
RandomSolarize
v2.RandomSolarize
RandomAdjustSharpness
v2.RandomAdjustSharpness
RandomAutocontrast
v2.RandomAutocontrast
RandomEqualize
v2.RandomEqualize
Composition
-----------
......@@ -136,9 +158,13 @@ Composition
:template: class.rst
Compose
v2.Compose
RandomApply
v2.RandomApply
RandomChoice
v2.RandomChoice
RandomOrder
v2.RandomOrder
Miscellaneous
-------------
......@@ -148,9 +174,13 @@ Miscellaneous
:template: class.rst
LinearTransformation
v2.LinearTransformation
Normalize
v2.Normalize
RandomErasing
v2.RandomErasing
Lambda
v2.Lambda
.. _conversion_transforms:
......@@ -162,9 +192,15 @@ Conversion
:template: class.rst
ToPILImage
v2.ToPILImage
v2.ToImagePIL
ToTensor
v2.ToTensor
PILToTensor
v2.PILToTensor
ConvertImageDtype
v2.ConvertImageDtype
v2.ConvertDtype
Auto-Augmentation
-----------------
......@@ -181,9 +217,13 @@ The new transform can be used standalone or mixed-and-matched with existing tran
AutoAugmentPolicy
AutoAugment
v2.AutoAugment
RandAugment
v2.RandAugment
TrivialAugmentWide
v2.TrivialAugmentWide
AugMix
v2.AugMix
.. _functional_transforms:
......
......@@ -13,6 +13,38 @@ from .utils import is_simple_tensor, query_chw
class RandomErasing(_RandomApplyTransform):
"""[BETA] Randomly selects a rectangle region in the input image or video and erases its pixels.
.. betastatus:: RandomErasing transform
This transform does not support PIL Image.
'Random Erasing Data Augmentation' by Zhong et al. See https://arxiv.org/abs/1708.04896
Args:
p: probability that the random erasing operation will be performed.
scale: range of proportion of erased area against input image.
ratio: range of aspect ratio of erased area.
value: erasing value. Default is 0. If a single int, it is used to
erase all pixels. If a tuple of length 3, it is used to erase
R, G, B channels respectively.
If a str of 'random', erasing each pixel with random values.
inplace: boolean to make this transform inplace. Default set to False.
Returns:
Erased input.
Example:
>>> from torchvision.transforms import v2 as transforms
>>>
>>> transform = transforms.Compose([
>>> transforms.RandomHorizontalFlip(),
>>> transforms.PILToTensor(),
>>> transforms.ConvertImageDtype(torch.float),
>>> transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
>>> transforms.RandomErasing(),
>>> ])
"""
_v1_transform_cls = _transforms.RandomErasing
def _extract_params_for_v1_transform(self) -> Dict[str, Any]:
......
......@@ -162,6 +162,24 @@ class _AutoAugmentBase(Transform):
class AutoAugment(_AutoAugmentBase):
r"""[BETA] AutoAugment data augmentation method based on
`"AutoAugment: Learning Augmentation Strategies from Data" <https://arxiv.org/pdf/1805.09501.pdf>`_.
.. betastatus:: AutoAugment transform
If the image is torch Tensor, it should be of type torch.uint8, and it is expected
to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args:
policy (AutoAugmentPolicy): Desired policy enum defined by
:class:`torchvision.transforms.autoaugment.AutoAugmentPolicy`. Default is ``AutoAugmentPolicy.IMAGENET``.
interpolation (InterpolationMode): Desired interpolation enum defined by
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
fill (sequence or number, optional): Pixel fill value for the area outside the transformed
image. If given a number, the value is used for all bands respectively.
"""
_v1_transform_cls = _transforms.AutoAugment
_AUGMENTATION_SPACE = {
......@@ -318,6 +336,27 @@ class AutoAugment(_AutoAugmentBase):
class RandAugment(_AutoAugmentBase):
r"""[BETA] RandAugment data augmentation method based on
`"RandAugment: Practical automated data augmentation with a reduced search space"
<https://arxiv.org/abs/1909.13719>`_.
.. betastatus:: RandAugment transform
If the image is torch Tensor, it should be of type torch.uint8, and it is expected
to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args:
num_ops (int): Number of augmentation transformations to apply sequentially.
magnitude (int): Magnitude for all the transformations.
num_magnitude_bins (int): The number of different magnitude values.
interpolation (InterpolationMode): Desired interpolation enum defined by
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
fill (sequence or number, optional): Pixel fill value for the area outside the transformed
image. If given a number, the value is used for all bands respectively.
"""
_v1_transform_cls = _transforms.RandAugment
_AUGMENTATION_SPACE = {
"Identity": (lambda num_bins, height, width: None, False),
......@@ -379,6 +418,24 @@ class RandAugment(_AutoAugmentBase):
class TrivialAugmentWide(_AutoAugmentBase):
r"""[BETA] Dataset-independent data-augmentation with TrivialAugment Wide, as described in
`"TrivialAugment: Tuning-free Yet State-of-the-Art Data Augmentation" <https://arxiv.org/abs/2103.10158>`_.
.. betastatus:: TrivialAugmentWide transform
If the image is torch Tensor, it should be of type torch.uint8, and it is expected
to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args:
num_magnitude_bins (int): The number of different magnitude values.
interpolation (InterpolationMode): Desired interpolation enum defined by
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
fill (sequence or number, optional): Pixel fill value for the area outside the transformed
image. If given a number, the value is used for all bands respectively.
"""
_v1_transform_cls = _transforms.TrivialAugmentWide
_AUGMENTATION_SPACE = {
"Identity": (lambda num_bins, height, width: None, False),
......@@ -430,6 +487,29 @@ class TrivialAugmentWide(_AutoAugmentBase):
class AugMix(_AutoAugmentBase):
r"""[BETA] AugMix data augmentation method based on
`"AugMix: A Simple Data Processing Method to Improve Robustness and Uncertainty" <https://arxiv.org/abs/1912.02781>`_.
.. betastatus:: AugMix transform
If the image is torch Tensor, it should be of type torch.uint8, and it is expected
to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args:
severity (int): The severity of base augmentation operators. Default is ``3``.
mixture_width (int): The number of augmentation chains. Default is ``3``.
chain_depth (int): The depth of augmentation chains. A negative value denotes stochastic depth sampled from the interval [1, 3].
Default is ``-1``.
alpha (float): The hyperparameter for the probability distributions. Default is ``1.0``.
all_ops (bool): Use all operations (including brightness, contrast, color and sharpness). Default is ``True``.
interpolation (InterpolationMode): Desired interpolation enum defined by
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
fill (sequence or number, optional): Pixel fill value for the area outside the transformed
image. If given a number, the value is used for all bands respectively.
"""
_v1_transform_cls = _transforms.AugMix
_PARTIAL_AUGMENTATION_SPACE = {
......
......@@ -11,6 +11,23 @@ from .utils import is_simple_tensor, query_chw
class Grayscale(Transform):
"""[BETA] Convert images or videos to grayscale.
.. betastatus:: Grayscale transform
If the image is torch Tensor, it is expected
to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions
Args:
num_output_channels (int): (1 or 3) number of channels desired for output image
Returns:
PIL Image: Grayscale version of the input.
- If ``num_output_channels == 1`` : returned image is single channel
- If ``num_output_channels == 3`` : returned image is 3 channel with r == g == b
"""
_v1_transform_cls = _transforms.Grayscale
_transformed_types = (
......@@ -29,6 +46,24 @@ class Grayscale(Transform):
class RandomGrayscale(_RandomApplyTransform):
"""[BETA] Randomly convert image to grayscale with a probability of p (default 0.1).
.. betastatus:: RandomGrayscale transform
If the image is torch Tensor, it is expected
to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions
Args:
p (float): probability that image should be converted to grayscale.
Returns:
PIL Image or Tensor: Grayscale version of the input image with probability p and unchanged
with probability (1-p).
- If input image is 1 channel: grayscale version is 1 channel
- If input image is 3 channel: grayscale version is 3 channel with r == g == b
"""
_v1_transform_cls = _transforms.RandomGrayscale
_transformed_types = (
......@@ -50,6 +85,32 @@ class RandomGrayscale(_RandomApplyTransform):
class ColorJitter(Transform):
"""[BETA] Randomly change the brightness, contrast, saturation and hue of an image.
.. betastatus:: ColorJitter transform
If the image is torch Tensor, it is expected
to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
If img is PIL Image, mode "1", "I", "F" and modes with transparency (alpha channel) are not supported.
Args:
brightness (float or tuple of float (min, max)): How much to jitter brightness.
brightness_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
or the given [min, max]. Should be non negative numbers.
contrast (float or tuple of float (min, max)): How much to jitter contrast.
contrast_factor is chosen uniformly from [max(0, 1 - contrast), 1 + contrast]
or the given [min, max]. Should be non-negative numbers.
saturation (float or tuple of float (min, max)): How much to jitter saturation.
saturation_factor is chosen uniformly from [max(0, 1 - saturation), 1 + saturation]
or the given [min, max]. Should be non negative numbers.
hue (float or tuple of float (min, max)): How much to jitter hue.
hue_factor is chosen uniformly from [-hue, hue] or the given [min, max].
Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5.
To jitter hue, the pixel values of the input image has to be non-negative for conversion to HSV space;
thus it does not work if you normalize your image to an interval with negative values,
or use an interpolation that generates negative values before using this function.
"""
_v1_transform_cls = _transforms.ColorJitter
def _extract_params_for_v1_transform(self) -> Dict[str, Any]:
......@@ -205,6 +266,18 @@ class RandomPhotometricDistort(Transform):
class RandomEqualize(_RandomApplyTransform):
"""[BETA] Equalize the histogram of the given image randomly with a given probability.
.. betastatus:: RandomEqualize transform
If the image is torch Tensor, it is expected
to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
If img is PIL Image, it is expected to be in mode "P", "L" or "RGB".
Args:
p (float): probability of the image being equalized. Default value is 0.5
"""
_v1_transform_cls = _transforms.RandomEqualize
def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
......@@ -212,6 +285,18 @@ class RandomEqualize(_RandomApplyTransform):
class RandomInvert(_RandomApplyTransform):
"""[BETA] Inverts the colors of the given image randomly with a given probability.
.. betastatus:: RandomInvert transform
If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format,
where ... means it can have an arbitrary number of leading dimensions.
If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args:
p (float): probability of the image being color inverted. Default value is 0.5
"""
_v1_transform_cls = _transforms.RandomInvert
def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
......@@ -219,6 +304,20 @@ class RandomInvert(_RandomApplyTransform):
class RandomPosterize(_RandomApplyTransform):
"""[BETA] Posterize the image randomly with a given probability by reducing the
number of bits for each color channel.
.. betastatus:: RandomPosterize transform
If the image is torch Tensor, it should be of type torch.uint8,
and it is expected to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args:
bits (int): number of bits to keep for each channel (0-8)
p (float): probability of the image being posterized. Default value is 0.5
"""
_v1_transform_cls = _transforms.RandomPosterize
def __init__(self, bits: int, p: float = 0.5) -> None:
......@@ -230,6 +329,20 @@ class RandomPosterize(_RandomApplyTransform):
class RandomSolarize(_RandomApplyTransform):
"""[BETA] Solarize the image randomly with a given probability by inverting all pixel
values above a threshold.
.. betastatus:: RandomSolarize transform
If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format,
where ... means it can have an arbitrary number of leading dimensions.
If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args:
threshold (float): all pixels equal or above this value are inverted.
p (float): probability of the image being solarized. Default value is 0.5
"""
_v1_transform_cls = _transforms.RandomSolarize
def __init__(self, threshold: float, p: float = 0.5) -> None:
......@@ -241,6 +354,18 @@ class RandomSolarize(_RandomApplyTransform):
class RandomAutocontrast(_RandomApplyTransform):
"""[BETA] Autocontrast the pixels of the given image randomly with a given probability.
.. betastatus:: RandomAutocontrast transform
If the image is torch Tensor, it is expected
to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
If img is PIL Image, it is expected to be in mode "L" or "RGB".
Args:
p (float): probability of the image being autocontrasted. Default value is 0.5
"""
_v1_transform_cls = _transforms.RandomAutocontrast
def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
......@@ -248,6 +373,20 @@ class RandomAutocontrast(_RandomApplyTransform):
class RandomAdjustSharpness(_RandomApplyTransform):
"""[BETA] Adjust the sharpness of the image randomly with a given probability.
.. betastatus:: RandomAdjustSharpness transform
If the image is torch Tensor,
it is expected to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
Args:
sharpness_factor (float): How much to adjust the sharpness. Can be
any non-negative number. 0 gives a blurred image, 1 gives the
original image while 2 increases the sharpness by a factor of 2.
p (float): probability of the image being sharpened. Default value is 0.5
"""
_v1_transform_cls = _transforms.RandomAdjustSharpness
def __init__(self, sharpness_factor: float, p: float = 0.5) -> None:
......
......@@ -9,6 +9,37 @@ from torchvision.transforms.v2 import Transform
class Compose(Transform):
"""[BETA] Composes several transforms together.
.. betastatus:: Compose transform
This transform does not support torchscript.
Please, see the note below.
Args:
transforms (list of ``Transform`` objects): list of transforms to compose.
Example:
>>> transforms.Compose([
>>> transforms.CenterCrop(10),
>>> transforms.PILToTensor(),
>>> transforms.ConvertImageDtype(torch.float),
>>> ])
.. note::
In order to script the transformations, please use ``torch.nn.Sequential`` as below.
>>> transforms = torch.nn.Sequential(
>>> transforms.CenterCrop(10),
>>> transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
>>> )
>>> scripted_transforms = torch.jit.script(transforms)
Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
`lambda` functions or ``PIL.Image``.
"""
def __init__(self, transforms: Sequence[Callable]) -> None:
super().__init__()
if not isinstance(transforms, Sequence):
......@@ -29,6 +60,27 @@ class Compose(Transform):
class RandomApply(Transform):
"""[BETA] Apply randomly a list of transformations with a given probability.
.. betastatus:: RandomApply transform
.. note::
In order to script the transformation, please use ``torch.nn.ModuleList`` as input instead of list/tuple of
transforms as shown below:
>>> transforms = transforms.RandomApply(torch.nn.ModuleList([
>>> transforms.ColorJitter(),
>>> ]), p=0.3)
>>> scripted_transforms = torch.jit.script(transforms)
Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
`lambda` functions or ``PIL.Image``.
Args:
transforms (sequence or torch.nn.Module): list of transformations
p (float): probability
"""
_v1_transform_cls = _transforms.RandomApply
def __init__(self, transforms: Union[Sequence[Callable], nn.ModuleList], p: float = 0.5) -> None:
......@@ -63,6 +115,12 @@ class RandomApply(Transform):
class RandomChoice(Transform):
"""[BETA] Apply single transformation randomly picked from a list.
.. betastatus:: RandomChoice transform
This transform does not support torchscript."""
def __init__(
self,
transforms: Sequence[Callable],
......@@ -99,6 +157,13 @@ class RandomChoice(Transform):
class RandomOrder(Transform):
"""[BETA] Apply a list of transformations in a random order.
.. betastatus:: RandomOrder transform
This transform does not support torchscript.
"""
def __init__(self, transforms: Sequence[Callable]) -> None:
if not isinstance(transforms, Sequence):
raise TypeError("Argument transforms should be a sequence of callables")
......
......@@ -10,6 +10,31 @@ from torchvision.transforms.v2 import Transform
class ToTensor(Transform):
"""[BETA] Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
.. betastatus:: ToTensor transform
.. warning::
:class:`v2.ToTensor` is deprecated and will be removed in a future release.
Please use instead ``transforms.Compose([transforms.ToImageTensor(), transforms.ConvertImageDtype()])``.
This transform does not support torchscript.
Converts a PIL Image or numpy.ndarray (H x W x C) in the range
[0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1)
or if the numpy.ndarray has dtype = np.uint8
In the other cases, tensors are returned without scaling.
.. note::
Because the input image is scaled to [0.0, 1.0], this transformation should not be used when
transforming target image masks. See the `references`_ for implementing the transforms for image masks.
.. _references: https://github.com/pytorch/vision/tree/main/references/segmentation
"""
_transformed_types = (PIL.Image.Image, np.ndarray)
def __init__(self) -> None:
......
This diff is collapsed.
......@@ -22,6 +22,27 @@ class ConvertBoundingBoxFormat(Transform):
class ConvertDtype(Transform):
"""[BETA] Convert a tensor image/box/mask to the given ``dtype`` and scale the values accordingly
.. betastatus:: ConvertDtype transform
This function does not support PIL Image.
Args:
dtype (torch.dtype): Desired data type of the output
.. note::
When converting from a smaller to a larger integer ``dtype`` the maximum values are **not** mapped exactly.
If converted back and forth, this mismatch has no effect.
Raises:
RuntimeError: When trying to cast :class:`torch.float32` to :class:`torch.int32` or :class:`torch.int64` as
well as for trying to cast :class:`torch.float64` to :class:`torch.int64`. These conversions might lead to
overflow errors since the floating point ``dtype`` cannot store consecutive integers over the whole range
of the integer ``dtype``.
"""
_v1_transform_cls = _transforms.ConvertImageDtype
_transformed_types = (is_simple_tensor, datapoints.Image, datapoints.Video)
......
......@@ -21,6 +21,16 @@ class Identity(Transform):
class Lambda(Transform):
"""[BETA] Apply a user-defined lambda as a transform.
.. betastatus:: Lambda transform
This transform does not support torchscript.
Args:
lambd (function): Lambda/function to be used for transform.
"""
def __init__(self, lambd: Callable[[Any], Any], *types: Type):
super().__init__()
self.lambd = lambd
......@@ -42,6 +52,26 @@ class Lambda(Transform):
class LinearTransformation(Transform):
"""[BETA] Transform a tensor image with a square transformation matrix and a mean_vector computed offline.
.. betastatus:: LinearTransformation transform
This transform does not support PIL Image.
Given transformation_matrix and mean_vector, will flatten the torch.*Tensor and
subtract mean_vector from it which is then followed by computing the dot
product with the transformation matrix and then reshaping the tensor to its
original shape.
Applications:
whitening transformation: Suppose X is a column vector zero-centered data.
Then compute the data covariance matrix [D x D] with torch.mm(X.t(), X),
perform SVD on this matrix and pass it as transformation_matrix.
Args:
transformation_matrix (Tensor): tensor [D x D], D = C x H x W
mean_vector (Tensor): tensor [D], D = C x H x W
"""
_v1_transform_cls = _transforms.LinearTransformation
_transformed_types = (is_simple_tensor, datapoints.Image, datapoints.Video)
......@@ -105,6 +135,26 @@ class LinearTransformation(Transform):
class Normalize(Transform):
"""[BETA] Normalize a tensor image with mean and standard deviation.
.. betastatus:: Normalize transform
This transform does not support PIL Image.
Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
channels, this transform will normalize each channel of the input
``torch.*Tensor`` i.e.,
``output[channel] = (input[channel] - mean[channel]) / std[channel]``
.. note::
This transform acts out of place, i.e., it does not mutate the input tensor.
Args:
mean (sequence): Sequence of means for each channel.
std (sequence): Sequence of standard deviations for each channel.
inplace(bool,optional): Bool to make this operation in-place.
"""
_v1_transform_cls = _transforms.Normalize
_transformed_types = (datapoints.Image, is_simple_tensor, datapoints.Video)
......@@ -125,6 +175,24 @@ class Normalize(Transform):
class GaussianBlur(Transform):
"""[BETA] Blurs image with randomly chosen Gaussian blur.
.. betastatus:: GausssianBlur transform
If the image is torch Tensor, it is expected
to have [..., C, H, W] shape, where ... means an arbitrary number of leading dimensions.
Args:
kernel_size (int or sequence): Size of the Gaussian kernel.
sigma (float or tuple of float (min, max)): Standard deviation to be used for
creating kernel to perform blurring. If float, sigma is fixed. If it is tuple
of float (min, max), sigma is chosen uniformly at random to lie in the
given range.
Returns:
PIL Image or Tensor: Gaussian blurred version of the input image.
"""
_v1_transform_cls = _transforms.GaussianBlur
def __init__(
......
......@@ -11,6 +11,15 @@ from torchvision.transforms.v2.utils import is_simple_tensor
class PILToTensor(Transform):
"""[BETA] Convert a ``PIL Image`` to a tensor of the same type.
.. betastatus:: PILToTensor transform
This transform does not support torchscript.
Converts a PIL Image (H x W x C) to a Tensor of shape (C x H x W).
"""
_transformed_types = (PIL.Image.Image,)
def _transform(self, inpt: PIL.Image.Image, params: Dict[str, Any]) -> torch.Tensor:
......@@ -27,6 +36,27 @@ class ToImageTensor(Transform):
class ToImagePIL(Transform):
"""[BETA] Convert a tensor or an ndarray to PIL Image.
.. betastatus:: ToImagePIL transform
This transform does not support torchscript.
Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape
H x W x C to a PIL Image while preserving the value range.
Args:
mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
If ``mode`` is ``None`` (default) there are some assumptions made about the input data:
- If the input has 4 channels, the ``mode`` is assumed to be ``RGBA``.
- If the input has 3 channels, the ``mode`` is assumed to be ``RGB``.
- If the input has 2 channels, the ``mode`` is assumed to be ``LA``.
- If the input has 1 channel, the ``mode`` is determined by the data type (i.e ``int``, ``float``,
``short``).
.. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes
"""
_transformed_types = (is_simple_tensor, datapoints.Image, np.ndarray)
def __init__(self, mode: Optional[str] = None) -> None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment