Commit cc26cd81 authored by panning's avatar panning
Browse files

merge v0.16.0

parents f78f29f5 fbb4cc54
.. _transforms_gallery:
Transforms
----------
import matplotlib.pyplot as plt
import torch
from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks
from torchvision import tv_tensors
from torchvision.transforms.v2 import functional as F
def plot(imgs, row_title=None, **imshow_kwargs):
if not isinstance(imgs[0], list):
# Make a 2d grid even if there's just 1 row
imgs = [imgs]
num_rows = len(imgs)
num_cols = len(imgs[0])
_, axs = plt.subplots(nrows=num_rows, ncols=num_cols, squeeze=False)
for row_idx, row in enumerate(imgs):
for col_idx, img in enumerate(row):
boxes = None
masks = None
if isinstance(img, tuple):
img, target = img
if isinstance(target, dict):
boxes = target.get("boxes")
masks = target.get("masks")
elif isinstance(target, tv_tensors.BoundingBoxes):
boxes = target
else:
raise ValueError(f"Unexpected target type: {type(target)}")
img = F.to_image(img)
if img.dtype.is_floating_point and img.min() < 0:
# Poor man's re-normalization for the colors to be OK-ish. This
# is useful for images coming out of Normalize()
img -= img.min()
img /= img.max()
img = F.to_dtype(img, torch.uint8, scale=True)
if boxes is not None:
img = draw_bounding_boxes(img, boxes, colors="yellow", width=3)
if masks is not None:
img = draw_segmentation_masks(img, masks.to(torch.bool), colors=["green"] * masks.shape[0], alpha=.65)
ax = axs[row_idx, col_idx]
ax.imshow(img.permute(1, 2, 0).numpy(), **imshow_kwargs)
ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])
if row_title is not None:
for row_idx in range(num_rows):
axs[row_idx, 0].set(ylabel=row_title[row_idx])
plt.tight_layout()
"""
===================================
How to write your own v2 transforms
===================================
.. note::
Try on `collab <https://colab.research.google.com/github/pytorch/vision/blob/gh-pages/main/_generated_ipynb_notebooks/plot_custom_transforms.ipynb>`_
or :ref:`go to the end <sphx_glr_download_auto_examples_transforms_plot_custom_transforms.py>` to download the full example code.
This guide explains how to write transforms that are compatible with the
torchvision transforms V2 API.
"""
# %%
import torch
from torchvision import tv_tensors
from torchvision.transforms import v2
# %%
# Just create a ``nn.Module`` and override the ``forward`` method
# ===============================================================
#
# In most cases, this is all you're going to need, as long as you already know
# the structure of the input that your transform will expect. For example if
# you're just doing image classification, your transform will typically accept a
# single image as input, or a ``(img, label)`` input. So you can just hard-code
# your ``forward`` method to accept just that, e.g.
#
# .. code:: python
#
# class MyCustomTransform(torch.nn.Module):
# def forward(self, img, label):
# # Do some transformations
# return new_img, new_label
#
# .. note::
#
# This means that if you have a custom transform that is already compatible
# with the V1 transforms (those in ``torchvision.transforms``), it will
# still work with the V2 transforms without any change!
#
# We will illustrate this more completely below with a typical detection case,
# where our samples are just images, bounding boxes and labels:
class MyCustomTransform(torch.nn.Module):
def forward(self, img, bboxes, label): # we assume inputs are always structured like this
print(
f"I'm transforming an image of shape {img.shape} "
f"with bboxes = {bboxes}\n{label = }"
)
# Do some transformations. Here, we're just passing though the input
return img, bboxes, label
transforms = v2.Compose([
MyCustomTransform(),
v2.RandomResizedCrop((224, 224), antialias=True),
v2.RandomHorizontalFlip(p=1),
v2.Normalize(mean=[0, 0, 0], std=[1, 1, 1])
])
H, W = 256, 256
img = torch.rand(3, H, W)
bboxes = tv_tensors.BoundingBoxes(
torch.tensor([[0, 10, 10, 20], [50, 50, 70, 70]]),
format="XYXY",
canvas_size=(H, W)
)
label = 3
out_img, out_bboxes, out_label = transforms(img, bboxes, label)
# %%
print(f"Output image shape: {out_img.shape}\nout_bboxes = {out_bboxes}\n{out_label = }")
# %%
# .. note::
# While working with TVTensor classes in your code, make sure to
# familiarize yourself with this section:
# :ref:`tv_tensor_unwrapping_behaviour`
#
# Supporting arbitrary input structures
# =====================================
#
# In the section above, we have assumed that you already know the structure of
# your inputs and that you're OK with hard-coding this expected structure in
# your code. If you want your custom transforms to be as flexible as possible,
# this can be a bit limiting.
#
# A key feature of the builtin Torchvision V2 transforms is that they can accept
# arbitrary input structure and return the same structure as output (with
# transformed entries). For example, transforms can accept a single image, or a
# tuple of ``(img, label)``, or an arbitrary nested dictionary as input:
structured_input = {
"img": img,
"annotations": (bboxes, label),
"something_that_will_be_ignored": (1, "hello")
}
structured_output = v2.RandomHorizontalFlip(p=1)(structured_input)
assert isinstance(structured_output, dict)
assert structured_output["something_that_will_be_ignored"] == (1, "hello")
print(f"The transformed bboxes are:\n{structured_output['annotations'][0]}")
# %%
# If you want to reproduce this behavior in your own transform, we invite you to
# look at our `code
# <https://github.com/pytorch/vision/blob/main/torchvision/transforms/v2/_transform.py>`_
# and adapt it to your needs.
#
# In brief, the core logic is to unpack the input into a flat list using `pytree
# <https://github.com/pytorch/pytorch/blob/main/torch/utils/_pytree.py>`_, and
# then transform only the entries that can be transformed (the decision is made
# based on the **class** of the entries, as all TVTensors are
# tensor-subclasses) plus some custom logic that is out of score here - check the
# code for details. The (potentially transformed) entries are then repacked and
# returned, in the same structure as the input.
#
# We do not provide public dev-facing tools to achieve that at this time, but if
# this is something that would be valuable to you, please let us know by opening
# an issue on our `GitHub repo <https://github.com/pytorch/vision/issues>`_.
"""
====================================
How to write your own TVTensor class
====================================
.. note::
Try on `collab <https://colab.research.google.com/github/pytorch/vision/blob/gh-pages/main/_generated_ipynb_notebooks/plot_custom_tv_tensors.ipynb>`_
or :ref:`go to the end <sphx_glr_download_auto_examples_transforms_plot_custom_tv_tensors.py>` to download the full example code.
This guide is intended for advanced users and downstream library maintainers. We explain how to
write your own TVTensor class, and how to make it compatible with the built-in
Torchvision v2 transforms. Before continuing, make sure you have read
:ref:`sphx_glr_auto_examples_transforms_plot_tv_tensors.py`.
"""
# %%
import torch
from torchvision import tv_tensors
from torchvision.transforms import v2
# %%
# We will create a very simple class that just inherits from the base
# :class:`~torchvision.tv_tensors.TVTensor` class. It will be enough to cover
# what you need to know to implement your more elaborate uses-cases. If you need
# to create a class that carries meta-data, take a look at how the
# :class:`~torchvision.tv_tensors.BoundingBoxes` class is `implemented
# <https://github.com/pytorch/vision/blob/main/torchvision/tv_tensors/_bounding_box.py>`_.
class MyTVTensor(tv_tensors.TVTensor):
pass
my_dp = MyTVTensor([1, 2, 3])
my_dp
# %%
# Now that we have defined our custom TVTensor class, we want it to be
# compatible with the built-in torchvision transforms, and the functional API.
# For that, we need to implement a kernel which performs the core of the
# transformation, and then "hook" it to the functional that we want to support
# via :func:`~torchvision.transforms.v2.functional.register_kernel`.
#
# We illustrate this process below: we create a kernel for the "horizontal flip"
# operation of our MyTVTensor class, and register it to the functional API.
from torchvision.transforms.v2 import functional as F
@F.register_kernel(functional="hflip", tv_tensor_cls=MyTVTensor)
def hflip_my_tv_tensor(my_dp, *args, **kwargs):
print("Flipping!")
out = my_dp.flip(-1)
return tv_tensors.wrap(out, like=my_dp)
# %%
# To understand why :func:`~torchvision.tv_tensors.wrap` is used, see
# :ref:`tv_tensor_unwrapping_behaviour`. Ignore the ``*args, **kwargs`` for now,
# we will explain it below in :ref:`param_forwarding`.
#
# .. note::
#
# In our call to ``register_kernel`` above we used a string
# ``functional="hflip"`` to refer to the functional we want to hook into. We
# could also have used the functional *itself*, i.e.
# ``@register_kernel(functional=F.hflip, ...)``.
#
# Now that we have registered our kernel, we can call the functional API on a
# ``MyTVTensor`` instance:
my_dp = MyTVTensor(torch.rand(3, 256, 256))
_ = F.hflip(my_dp)
# %%
# And we can also use the
# :class:`~torchvision.transforms.v2.RandomHorizontalFlip` transform, since it relies on :func:`~torchvision.transforms.v2.functional.hflip` internally:
t = v2.RandomHorizontalFlip(p=1)
_ = t(my_dp)
# %%
# .. note::
#
# We cannot register a kernel for a transform class, we can only register a
# kernel for a **functional**. The reason we can't register a transform
# class is because one transform may internally rely on more than one
# functional, so in general we can't register a single kernel for a given
# class.
#
# .. _param_forwarding:
#
# Parameter forwarding, and ensuring future compatibility of your kernels
# -----------------------------------------------------------------------
#
# The functional API that you're hooking into is public and therefore
# **backward** compatible: we guarantee that the parameters of these functionals
# won't be removed or renamed without a proper deprecation cycle. However, we
# don't guarantee **forward** compatibility, and we may add new parameters in
# the future.
#
# Imagine that in a future version, Torchvision adds a new ``inplace`` parameter
# to its :func:`~torchvision.transforms.v2.functional.hflip` functional. If you
# already defined and registered your own kernel as
def hflip_my_tv_tensor(my_dp): # noqa
print("Flipping!")
out = my_dp.flip(-1)
return tv_tensors.wrap(out, like=my_dp)
# %%
# then calling ``F.hflip(my_dp)`` will **fail**, because ``hflip`` will try to
# pass the new ``inplace`` parameter to your kernel, but your kernel doesn't
# accept it.
#
# For this reason, we recommend to always define your kernels with
# ``*args, **kwargs`` in their signature, as done above. This way, your kernel
# will be able to accept any new parameter that we may add in the future.
# (Technically, adding `**kwargs` only should be enough).
"""
===========================
How to use CutMix and MixUp
===========================
.. note::
Try on `collab <https://colab.research.google.com/github/pytorch/vision/blob/gh-pages/main/_generated_ipynb_notebooks/plot_cutmix_mixup.ipynb>`_
or :ref:`go to the end <sphx_glr_download_auto_examples_transforms_plot_cutmix_mixup.py>` to download the full example code.
:class:`~torchvision.transforms.v2.CutMix` and
:class:`~torchvision.transforms.v2.MixUp` are popular augmentation strategies
that can improve classification accuracy.
These transforms are slightly different from the rest of the Torchvision
transforms, because they expect
**batches** of samples as input, not individual images. In this example we'll
explain how to use them: after the ``DataLoader``, or as part of a collation
function.
"""
# %%
import torch
from torchvision.datasets import FakeData
from torchvision.transforms import v2
NUM_CLASSES = 100
# %%
# Pre-processing pipeline
# -----------------------
#
# We'll use a simple but typical image classification pipeline:
preproc = v2.Compose([
v2.PILToTensor(),
v2.RandomResizedCrop(size=(224, 224), antialias=True),
v2.RandomHorizontalFlip(p=0.5),
v2.ToDtype(torch.float32, scale=True), # to float32 in [0, 1]
v2.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), # typically from ImageNet
])
dataset = FakeData(size=1000, num_classes=NUM_CLASSES, transform=preproc)
img, label = dataset[0]
print(f"{type(img) = }, {img.dtype = }, {img.shape = }, {label = }")
# %%
#
# One important thing to note is that neither CutMix nor MixUp are part of this
# pre-processing pipeline. We'll add them a bit later once we define the
# DataLoader. Just as a refresher, this is what the DataLoader and training loop
# would look like if we weren't using CutMix or MixUp:
from torch.utils.data import DataLoader
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)
for images, labels in dataloader:
print(f"{images.shape = }, {labels.shape = }")
print(labels.dtype)
# <rest of the training loop here>
break
# %%
# %%
# Where to use MixUp and CutMix
# -----------------------------
#
# After the DataLoader
# ^^^^^^^^^^^^^^^^^^^^
#
# Now let's add CutMix and MixUp. The simplest way to do this right after the
# DataLoader: the Dataloader has already batched the images and labels for us,
# and this is exactly what these transforms expect as input:
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)
cutmix = v2.CutMix(num_classes=NUM_CLASSES)
mixup = v2.MixUp(num_classes=NUM_CLASSES)
cutmix_or_mixup = v2.RandomChoice([cutmix, mixup])
for images, labels in dataloader:
print(f"Before CutMix/MixUp: {images.shape = }, {labels.shape = }")
images, labels = cutmix_or_mixup(images, labels)
print(f"After CutMix/MixUp: {images.shape = }, {labels.shape = }")
# <rest of the training loop here>
break
# %%
#
# Note how the labels were also transformed: we went from a batched label of
# shape (batch_size,) to a tensor of shape (batch_size, num_classes). The
# transformed labels can still be passed as-is to a loss function like
# :func:`torch.nn.functional.cross_entropy`.
#
# As part of the collation function
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# Passing the transforms after the DataLoader is the simplest way to use CutMix
# and MixUp, but one disadvantage is that it does not take advantage of the
# DataLoader multi-processing. For that, we can pass those transforms as part of
# the collation function (refer to the `PyTorch docs
# <https://pytorch.org/docs/stable/data.html#dataloader-collate-fn>`_ to learn
# more about collation).
from torch.utils.data import default_collate
def collate_fn(batch):
return cutmix_or_mixup(*default_collate(batch))
dataloader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=2, collate_fn=collate_fn)
for images, labels in dataloader:
print(f"{images.shape = }, {labels.shape = }")
# No need to call cutmix_or_mixup, it's already been called as part of the DataLoader!
# <rest of the training loop here>
break
# %%
# Non-standard input format
# -------------------------
#
# So far we've used a typical sample structure where we pass ``(images,
# labels)`` as inputs. MixUp and CutMix will magically work by default with most
# common sample structures: tuples where the second parameter is a tensor label,
# or dict with a "label[s]" key. Look at the documentation of the
# ``labels_getter`` parameter for more details.
#
# If your samples have a different structure, you can still use CutMix and MixUp
# by passing a callable to the ``labels_getter`` parameter. For example:
batch = {
"imgs": torch.rand(4, 3, 224, 224),
"target": {
"classes": torch.randint(0, NUM_CLASSES, size=(4,)),
"some_other_key": "this is going to be passed-through"
}
}
def labels_getter(batch):
return batch["target"]["classes"]
out = v2.CutMix(num_classes=NUM_CLASSES, labels_getter=labels_getter)(batch)
print(f"{out['imgs'].shape = }, {out['target']['classes'].shape = }")
"""
===============================================================
Transforms v2: End-to-end object detection/segmentation example
===============================================================
.. note::
Try on `collab <https://colab.research.google.com/github/pytorch/vision/blob/gh-pages/main/_generated_ipynb_notebooks/plot_transforms_e2e.ipynb>`_
or :ref:`go to the end <sphx_glr_download_auto_examples_transforms_plot_transforms_e2e.py>` to download the full example code.
Object detection and segmentation tasks are natively supported:
``torchvision.transforms.v2`` enables jointly transforming images, videos,
bounding boxes, and masks.
This example showcases an end-to-end instance segmentation training case using
Torchvision utils from ``torchvision.datasets``, ``torchvision.models`` and
``torchvision.transforms.v2``. Everything covered here can be applied similarly
to object detection or semantic segmentation tasks.
"""
# %%
import pathlib
import torch
import torch.utils.data
from torchvision import models, datasets, tv_tensors
from torchvision.transforms import v2
torch.manual_seed(0)
# This loads fake data for illustration purposes of this example. In practice, you'll have
# to replace this with the proper data.
# If you're trying to run that on collab, you can download the assets and the
# helpers from https://github.com/pytorch/vision/tree/main/gallery/
ROOT = pathlib.Path("../assets") / "coco"
IMAGES_PATH = str(ROOT / "images")
ANNOTATIONS_PATH = str(ROOT / "instances.json")
from helpers import plot
# %%
# Dataset preparation
# -------------------
#
# We start off by loading the :class:`~torchvision.datasets.CocoDetection` dataset to have a look at what it currently
# returns.
dataset = datasets.CocoDetection(IMAGES_PATH, ANNOTATIONS_PATH)
sample = dataset[0]
img, target = sample
print(f"{type(img) = }\n{type(target) = }\n{type(target[0]) = }\n{target[0].keys() = }")
# %%
# Torchvision datasets preserve the data structure and types as it was intended
# by the datasets authors. So by default, the output structure may not always be
# compatible with the models or the transforms.
#
# To overcome that, we can use the
# :func:`~torchvision.datasets.wrap_dataset_for_transforms_v2` function. For
# :class:`~torchvision.datasets.CocoDetection`, this changes the target
# structure to a single dictionary of lists:
dataset = datasets.wrap_dataset_for_transforms_v2(dataset, target_keys=("boxes", "labels", "masks"))
sample = dataset[0]
img, target = sample
print(f"{type(img) = }\n{type(target) = }\n{target.keys() = }")
print(f"{type(target['boxes']) = }\n{type(target['labels']) = }\n{type(target['masks']) = }")
# %%
# We used the ``target_keys`` parameter to specify the kind of output we're
# interested in. Our dataset now returns a target which is dict where the values
# are :ref:`TVTensors <what_are_tv_tensors>` (all are :class:`torch.Tensor`
# subclasses). We're dropped all unncessary keys from the previous output, but
# if you need any of the original keys e.g. "image_id", you can still ask for
# it.
#
# .. note::
#
# If you just want to do detection, you don't need and shouldn't pass
# "masks" in ``target_keys``: if masks are present in the sample, they will
# be transformed, slowing down your transformations unnecessarily.
#
# As baseline, let's have a look at a sample without transformations:
plot([dataset[0], dataset[1]])
# %%
# Transforms
# ----------
#
# Let's now define our pre-processing transforms. All the transforms know how
# to handle images, bouding boxes and masks when relevant.
#
# Transforms are typically passed as the ``transforms`` parameter of the
# dataset so that they can leverage multi-processing from the
# :class:`torch.utils.data.DataLoader`.
transforms = v2.Compose(
[
v2.ToImage(),
v2.RandomPhotometricDistort(p=1),
v2.RandomZoomOut(fill={tv_tensors.Image: (123, 117, 104), "others": 0}),
v2.RandomIoUCrop(),
v2.RandomHorizontalFlip(p=1),
v2.SanitizeBoundingBoxes(),
v2.ToDtype(torch.float32, scale=True),
]
)
dataset = datasets.CocoDetection(IMAGES_PATH, ANNOTATIONS_PATH, transforms=transforms)
dataset = datasets.wrap_dataset_for_transforms_v2(dataset, target_keys=["boxes", "labels", "masks"])
# %%
# A few things are worth noting here:
#
# - We're converting the PIL image into a
# :class:`~torchvision.transforms.v2.Image` object. This isn't strictly
# necessary, but relying on Tensors (here: a Tensor subclass) will
# :ref:`generally be faster <transforms_perf>`.
# - We are calling :class:`~torchvision.transforms.v2.SanitizeBoundingBoxes` to
# make sure we remove degenerate bounding boxes, as well as their
# corresponding labels and masks.
# :class:`~torchvision.transforms.v2.SanitizeBoundingBoxes` should be placed
# at least once at the end of a detection pipeline; it is particularly
# critical if :class:`~torchvision.transforms.v2.RandomIoUCrop` was used.
#
# Let's look how the sample looks like with our augmentation pipeline in place:
# sphinx_gallery_thumbnail_number = 2
plot([dataset[0], dataset[1]])
# %%
# We can see that the color of the images were distorted, zoomed in or out, and flipped.
# The bounding boxes and the masks were transformed accordingly. And without any further ado, we can start training.
#
# Data loading and training loop
# ------------------------------
#
# Below we're using Mask-RCNN which is an instance segmentation model, but
# everything we've covered in this tutorial also applies to object detection and
# semantic segmentation tasks.
data_loader = torch.utils.data.DataLoader(
dataset,
batch_size=2,
# We need a custom collation function here, since the object detection
# models expect a sequence of images and target dictionaries. The default
# collation function tries to torch.stack() the individual elements,
# which fails in general for object detection, because the number of bouding
# boxes varies between the images of a same batch.
collate_fn=lambda batch: tuple(zip(*batch)),
)
model = models.get_model("maskrcnn_resnet50_fpn_v2", weights=None, weights_backbone=None).train()
for imgs, targets in data_loader:
loss_dict = model(imgs, targets)
# Put your training logic here
print(f"{[img.shape for img in imgs] = }")
print(f"{[type(target) for target in targets] = }")
for name, loss_val in loss_dict.items():
print(f"{name:<20}{loss_val:.3f}")
# %%
# Training References
# -------------------
#
# From there, you can check out the `torchvision references
# <https://github.com/pytorch/vision/tree/main/references>`_ where you'll find
# the actual training scripts we use to train our models.
#
# **Disclaimer** The code in our references is more complex than what you'll
# need for your own use-cases: this is because we're supporting different
# backends (PIL, tensors, TVTensors) and different transforms namespaces (v1 and
# v2). So don't be afraid to simplify and only keep what you need.
"""
==================================
Getting started with transforms v2
==================================
.. note::
Try on `collab <https://colab.research.google.com/github/pytorch/vision/blob/gh-pages/main/_generated_ipynb_notebooks/plot_transforms_getting_started.ipynb>`_
or :ref:`go to the end <sphx_glr_download_auto_examples_transforms_plot_transforms_getting_started.py>` to download the full example code.
This example illustrates all of what you need to know to get started with the
new :mod:`torchvision.transforms.v2` API. We'll cover simple tasks like
image classification, and more advanced ones like object detection /
segmentation.
"""
# %%
# First, a bit of setup
from pathlib import Path
import torch
import matplotlib.pyplot as plt
plt.rcParams["savefig.bbox"] = 'tight'
from torchvision.transforms import v2
from torchvision.io import read_image
torch.manual_seed(1)
# If you're trying to run that on collab, you can download the assets and the
# helpers from https://github.com/pytorch/vision/tree/main/gallery/
from helpers import plot
img = read_image(str(Path('../assets') / 'astronaut.jpg'))
print(f"{type(img) = }, {img.dtype = }, {img.shape = }")
# %%
# The basics
# ----------
#
# The Torchvision transforms behave like a regular :class:`torch.nn.Module` (in
# fact, most of them are): instantiate a transform, pass an input, get a
# transformed output:
transform = v2.RandomCrop(size=(224, 224))
out = transform(img)
plot([img, out])
# %%
# I just want to do image classification
# --------------------------------------
#
# If you just care about image classification, things are very simple. A basic
# classification pipeline may look like this:
transforms = v2.Compose([
v2.RandomResizedCrop(size=(224, 224), antialias=True),
v2.RandomHorizontalFlip(p=0.5),
v2.ToDtype(torch.float32, scale=True),
v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
out = transforms(img)
plot([img, out])
# %%
# Such transformation pipeline is typically passed as the ``transform`` argument
# to the :ref:`Datasets <datasets>`, e.g. ``ImageNet(...,
# transform=transforms)``.
#
# That's pretty much all there is. From there, read through our :ref:`main docs
# <transforms>` to learn more about recommended practices and conventions, or
# explore more :ref:`examples <transforms_gallery>` e.g. how to use augmentation
# transforms like :ref:`CutMix and MixUp
# <sphx_glr_auto_examples_transforms_plot_cutmix_mixup.py>`.
#
# .. note::
#
# If you're already relying on the ``torchvision.transforms`` v1 API,
# we recommend to :ref:`switch to the new v2 transforms<v1_or_v2>`. It's
# very easy: the v2 transforms are fully compatible with the v1 API, so you
# only need to change the import!
#
# Detection, Segmentation, Videos
# -------------------------------
#
# The new Torchvision transforms in the ``torchvision.transforms.v2`` namespace
# support tasks beyond image classification: they can also transform bounding
# boxes, segmentation / detection masks, or videos.
#
# Let's briefly look at a detection example with bounding boxes.
from torchvision import tv_tensors # we'll describe this a bit later, bare with us
boxes = tv_tensors.BoundingBoxes(
[
[15, 10, 370, 510],
[275, 340, 510, 510],
[130, 345, 210, 425]
],
format="XYXY", canvas_size=img.shape[-2:])
transforms = v2.Compose([
v2.RandomResizedCrop(size=(224, 224), antialias=True),
v2.RandomPhotometricDistort(p=1),
v2.RandomHorizontalFlip(p=1),
])
out_img, out_boxes = transforms(img, boxes)
print(type(boxes), type(out_boxes))
plot([(img, boxes), (out_img, out_boxes)])
# %%
#
# The example above focuses on object detection. But if we had masks
# (:class:`torchvision.tv_tensors.Mask`) for object segmentation or semantic
# segmentation, or videos (:class:`torchvision.tv_tensors.Video`), we could have
# passed them to the transforms in exactly the same way.
#
# By now you likely have a few questions: what are these TVTensors, how do we
# use them, and what is the expected input/output of those transforms? We'll
# answer these in the next sections.
# %%
#
# .. _what_are_tv_tensors:
#
# What are TVTensors?
# --------------------
#
# TVTensors are :class:`torch.Tensor` subclasses. The available TVTensors are
# :class:`~torchvision.tv_tensors.Image`,
# :class:`~torchvision.tv_tensors.BoundingBoxes`,
# :class:`~torchvision.tv_tensors.Mask`, and
# :class:`~torchvision.tv_tensors.Video`.
#
# TVTensors look and feel just like regular tensors - they **are** tensors.
# Everything that is supported on a plain :class:`torch.Tensor` like ``.sum()``
# or any ``torch.*`` operator will also work on a TVTensor:
img_dp = tv_tensors.Image(torch.randint(0, 256, (3, 256, 256), dtype=torch.uint8))
print(f"{isinstance(img_dp, torch.Tensor) = }")
print(f"{img_dp.dtype = }, {img_dp.shape = }, {img_dp.sum() = }")
# %%
# These TVTensor classes are at the core of the transforms: in order to
# transform a given input, the transforms first look at the **class** of the
# object, and dispatch to the appropriate implementation accordingly.
#
# You don't need to know much more about TVTensors at this point, but advanced
# users who want to learn more can refer to
# :ref:`sphx_glr_auto_examples_transforms_plot_tv_tensors.py`.
#
# What do I pass as input?
# ------------------------
#
# Above, we've seen two examples: one where we passed a single image as input
# i.e. ``out = transforms(img)``, and one where we passed both an image and
# bounding boxes, i.e. ``out_img, out_boxes = transforms(img, boxes)``.
#
# In fact, transforms support **arbitrary input structures**. The input can be a
# single image, a tuple, an arbitrarily nested dictionary... pretty much
# anything. The same structure will be returned as output. Below, we use the
# same detection transforms, but pass a tuple (image, target_dict) as input and
# we're getting the same structure as output:
target = {
"boxes": boxes,
"labels": torch.arange(boxes.shape[0]),
"this_is_ignored": ("arbitrary", {"structure": "!"})
}
# Re-using the transforms and definitions from above.
out_img, out_target = transforms(img, target)
# sphinx_gallery_thumbnail_number = 4
plot([(img, target["boxes"]), (out_img, out_target["boxes"])])
print(f"{out_target['this_is_ignored']}")
# %%
# We passed a tuple so we get a tuple back, and the second element is the
# tranformed target dict. Transforms don't really care about the structure of
# the input; as mentioned above, they only care about the **type** of the
# objects and transforms them accordingly.
#
# *Foreign* objects like strings or ints are simply passed-through. This can be
# useful e.g. if you want to associate a path with every single sample when
# debugging!
#
# .. _passthrough_heuristic:
#
# .. note::
#
# **Disclaimer** This note is slightly advanced and can be safely skipped on
# a first read.
#
# Pure :class:`torch.Tensor` objects are, in general, treated as images (or
# as videos for video-specific transforms). Indeed, you may have noticed
# that in the code above we haven't used the
# :class:`~torchvision.tv_tensors.Image` class at all, and yet our images
# got transformed properly. Transforms follow the following logic to
# determine whether a pure Tensor should be treated as an image (or video),
# or just ignored:
#
# * If there is an :class:`~torchvision.tv_tensors.Image`,
# :class:`~torchvision.tv_tensors.Video`,
# or :class:`PIL.Image.Image` instance in the input, all other pure
# tensors are passed-through.
# * If there is no :class:`~torchvision.tv_tensors.Image` or
# :class:`~torchvision.tv_tensors.Video` instance, only the first pure
# :class:`torch.Tensor` will be transformed as image or video, while all
# others will be passed-through. Here "first" means "first in a depth-wise
# traversal".
#
# This is what happened in the detection example above: the first pure
# tensor was the image so it got transformed properly, and all other pure
# tensor instances like the ``labels`` were passed-through (although labels
# can still be transformed by some transforms like
# :class:`~torchvision.transforms.v2.SanitizeBoundingBoxes`!).
#
# .. _transforms_datasets_intercompatibility:
#
# Transforms and Datasets intercompatibility
# ------------------------------------------
#
# Roughly speaking, the output of the datasets must correspond to the input of
# the transforms. How to do that depends on whether you're using the torchvision
# :ref:`built-in datatsets <datasets>`, or your own custom datasets.
#
# Using built-in datasets
# ^^^^^^^^^^^^^^^^^^^^^^^
#
# If you're just doing image classification, you don't need to do anything. Just
# use ``transform`` argument of the dataset e.g. ``ImageNet(...,
# transform=transforms)`` and you're good to go.
#
# Torchvision also supports datasets for object detection or segmentation like
# :class:`torchvision.datasets.CocoDetection`. Those datasets predate
# the existence of the :mod:`torchvision.transforms.v2` module and of the
# TVTensors, so they don't return TVTensors out of the box.
#
# An easy way to force those datasets to return TVTensors and to make them
# compatible with v2 transforms is to use the
# :func:`torchvision.datasets.wrap_dataset_for_transforms_v2` function:
#
# .. code-block:: python
#
# from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2
#
# dataset = CocoDetection(..., transforms=my_transforms)
# dataset = wrap_dataset_for_transforms_v2(dataset)
# # Now the dataset returns TVTensors!
#
# Using your own datasets
# ^^^^^^^^^^^^^^^^^^^^^^^
#
# If you have a custom dataset, then you'll need to convert your objects into
# the appropriate TVTensor classes. Creating TVTensor instances is very easy,
# refer to :ref:`tv_tensor_creation` for more details.
#
# There are two main places where you can implement that conversion logic:
#
# - At the end of the datasets's ``__getitem__`` method, before returning the
# sample (or by sub-classing the dataset).
# - As the very first step of your transforms pipeline
#
# Either way, the logic will depend on your specific dataset.
......@@ -3,317 +3,318 @@
Illustration of transforms
==========================
This example illustrates the various transforms available in :ref:`the
torchvision.transforms module <transforms>`.
.. note::
Try on `collab <https://colab.research.google.com/github/pytorch/vision/blob/gh-pages/main/_generated_ipynb_notebooks/plot_transforms_illustrations.ipynb>`_
or :ref:`go to the end <sphx_glr_download_auto_examples_transforms_plot_transforms_illustrations.py>` to download the full example code.
This example illustrates some of the various transforms available in :ref:`the
torchvision.transforms.v2 module <transforms>`.
"""
# %%
# sphinx_gallery_thumbnail_path = "../../gallery/assets/transforms_thumbnail.png"
from PIL import Image
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import torch
import torchvision.transforms as T
from torchvision.transforms import v2
plt.rcParams["savefig.bbox"] = 'tight'
orig_img = Image.open(Path('assets') / 'astronaut.jpg')
# if you change the seed, make sure that the randomly-applied transforms
# properly show that the image can be both transformed and *not* transformed!
torch.manual_seed(0)
def plot(imgs, with_orig=True, row_title=None, **imshow_kwargs):
if not isinstance(imgs[0], list):
# Make a 2d grid even if there's just 1 row
imgs = [imgs]
num_rows = len(imgs)
num_cols = len(imgs[0]) + with_orig
fig, axs = plt.subplots(nrows=num_rows, ncols=num_cols, squeeze=False)
for row_idx, row in enumerate(imgs):
row = [orig_img] + row if with_orig else row
for col_idx, img in enumerate(row):
ax = axs[row_idx, col_idx]
ax.imshow(np.asarray(img), **imshow_kwargs)
ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])
if with_orig:
axs[0, 0].set(title='Original image')
axs[0, 0].title.set_size(8)
if row_title is not None:
for row_idx in range(num_rows):
axs[row_idx, 0].set(ylabel=row_title[row_idx])
plt.tight_layout()
####################################
# If you're trying to run that on collab, you can download the assets and the
# helpers from https://github.com/pytorch/vision/tree/main/gallery/
from helpers import plot
orig_img = Image.open(Path('../assets') / 'astronaut.jpg')
# %%
# Geometric Transforms
# --------------------
# Geometric image transformation refers to the process of altering the geometric properties of an image,
# such as its shape, size, orientation, or position.
# It involves applying mathematical operations to the image pixels or coordinates to achieve the desired transformation.
#
# Pad
# ---
# ~~~
# The :class:`~torchvision.transforms.Pad` transform
# (see also :func:`~torchvision.transforms.functional.pad`)
# fills image borders with some pixel values.
padded_imgs = [T.Pad(padding=padding)(orig_img) for padding in (3, 10, 30, 50)]
plot(padded_imgs)
# pads all image borders with some pixel values.
padded_imgs = [v2.Pad(padding=padding)(orig_img) for padding in (3, 10, 30, 50)]
plot([orig_img] + padded_imgs)
####################################
# %%
# Resize
# ------
# ~~~~~~
# The :class:`~torchvision.transforms.Resize` transform
# (see also :func:`~torchvision.transforms.functional.resize`)
# resizes an image.
resized_imgs = [T.Resize(size=size)(orig_img) for size in (30, 50, 100, orig_img.size)]
plot(resized_imgs)
resized_imgs = [v2.Resize(size=size)(orig_img) for size in (30, 50, 100, orig_img.size)]
plot([orig_img] + resized_imgs)
####################################
# %%
# CenterCrop
# ----------
# ~~~~~~~~~~
# The :class:`~torchvision.transforms.CenterCrop` transform
# (see also :func:`~torchvision.transforms.functional.center_crop`)
# crops the given image at the center.
center_crops = [T.CenterCrop(size=size)(orig_img) for size in (30, 50, 100, orig_img.size)]
plot(center_crops)
center_crops = [v2.CenterCrop(size=size)(orig_img) for size in (30, 50, 100, orig_img.size)]
plot([orig_img] + center_crops)
####################################
# %%
# FiveCrop
# --------
# ~~~~~~~~
# The :class:`~torchvision.transforms.FiveCrop` transform
# (see also :func:`~torchvision.transforms.functional.five_crop`)
# crops the given image into four corners and the central crop.
(top_left, top_right, bottom_left, bottom_right, center) = T.FiveCrop(size=(100, 100))(orig_img)
plot([top_left, top_right, bottom_left, bottom_right, center])
(top_left, top_right, bottom_left, bottom_right, center) = v2.FiveCrop(size=(100, 100))(orig_img)
plot([orig_img] + [top_left, top_right, bottom_left, bottom_right, center])
####################################
# Grayscale
# ---------
# The :class:`~torchvision.transforms.Grayscale` transform
# (see also :func:`~torchvision.transforms.functional.to_grayscale`)
# converts an image to grayscale
gray_img = T.Grayscale()(orig_img)
plot([gray_img], cmap='gray')
####################################
# Random transforms
# -----------------
# The following transforms are random, which means that the same transfomer
# instance will produce different result each time it transforms a given image.
#
# ColorJitter
# ~~~~~~~~~~~
# The :class:`~torchvision.transforms.ColorJitter` transform
# randomly changes the brightness, saturation, and other properties of an image.
jitter = T.ColorJitter(brightness=.5, hue=.3)
jitted_imgs = [jitter(orig_img) for _ in range(4)]
plot(jitted_imgs)
####################################
# GaussianBlur
# ~~~~~~~~~~~~
# The :class:`~torchvision.transforms.GaussianBlur` transform
# (see also :func:`~torchvision.transforms.functional.gaussian_blur`)
# performs gaussian blur transform on an image.
blurrer = T.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5))
blurred_imgs = [blurrer(orig_img) for _ in range(4)]
plot(blurred_imgs)
####################################
# %%
# RandomPerspective
# ~~~~~~~~~~~~~~~~~
# The :class:`~torchvision.transforms.RandomPerspective` transform
# (see also :func:`~torchvision.transforms.functional.perspective`)
# performs random perspective transform on an image.
perspective_transformer = T.RandomPerspective(distortion_scale=0.6, p=1.0)
perspective_transformer = v2.RandomPerspective(distortion_scale=0.6, p=1.0)
perspective_imgs = [perspective_transformer(orig_img) for _ in range(4)]
plot(perspective_imgs)
plot([orig_img] + perspective_imgs)
####################################
# %%
# RandomRotation
# ~~~~~~~~~~~~~~
# The :class:`~torchvision.transforms.RandomRotation` transform
# (see also :func:`~torchvision.transforms.functional.rotate`)
# rotates an image with random angle.
rotater = T.RandomRotation(degrees=(0, 180))
rotater = v2.RandomRotation(degrees=(0, 180))
rotated_imgs = [rotater(orig_img) for _ in range(4)]
plot(rotated_imgs)
plot([orig_img] + rotated_imgs)
####################################
# %%
# RandomAffine
# ~~~~~~~~~~~~
# The :class:`~torchvision.transforms.RandomAffine` transform
# (see also :func:`~torchvision.transforms.functional.affine`)
# performs random affine transform on an image.
affine_transfomer = T.RandomAffine(degrees=(30, 70), translate=(0.1, 0.3), scale=(0.5, 0.75))
affine_transfomer = v2.RandomAffine(degrees=(30, 70), translate=(0.1, 0.3), scale=(0.5, 0.75))
affine_imgs = [affine_transfomer(orig_img) for _ in range(4)]
plot(affine_imgs)
plot([orig_img] + affine_imgs)
####################################
# %%
# ElasticTransform
# ~~~~~~~~~~~~~~~~
# The :class:`~torchvision.transforms.ElasticTransform` transform
# (see also :func:`~torchvision.transforms.functional.elastic_transform`)
# Randomly transforms the morphology of objects in images and produces a
# see-through-water-like effect.
elastic_transformer = T.ElasticTransform(alpha=250.0)
elastic_transformer = v2.ElasticTransform(alpha=250.0)
transformed_imgs = [elastic_transformer(orig_img) for _ in range(2)]
plot(transformed_imgs)
plot([orig_img] + transformed_imgs)
####################################
# %%
# RandomCrop
# ~~~~~~~~~~
# The :class:`~torchvision.transforms.RandomCrop` transform
# (see also :func:`~torchvision.transforms.functional.crop`)
# crops an image at a random location.
cropper = T.RandomCrop(size=(128, 128))
cropper = v2.RandomCrop(size=(128, 128))
crops = [cropper(orig_img) for _ in range(4)]
plot(crops)
plot([orig_img] + crops)
####################################
# %%
# RandomResizedCrop
# ~~~~~~~~~~~~~~~~~
# The :class:`~torchvision.transforms.RandomResizedCrop` transform
# (see also :func:`~torchvision.transforms.functional.resized_crop`)
# crops an image at a random location, and then resizes the crop to a given
# size.
resize_cropper = T.RandomResizedCrop(size=(32, 32))
resize_cropper = v2.RandomResizedCrop(size=(32, 32))
resized_crops = [resize_cropper(orig_img) for _ in range(4)]
plot(resized_crops)
plot([orig_img] + resized_crops)
# %%
# Photometric Transforms
# ----------------------
# Photometric image transformation refers to the process of modifying the photometric properties of an image,
# such as its brightness, contrast, color, or tone.
# These transformations are applied to change the visual appearance of an image
# while preserving its geometric structure.
#
# Except :class:`~torchvision.transforms.Grayscale`, the following transforms are random,
# which means that the same transform
# instance will produce different result each time it transforms a given image.
#
# Grayscale
# ~~~~~~~~~
# The :class:`~torchvision.transforms.Grayscale` transform
# (see also :func:`~torchvision.transforms.functional.to_grayscale`)
# converts an image to grayscale
gray_img = v2.Grayscale()(orig_img)
plot([orig_img, gray_img], cmap='gray')
####################################
# %%
# ColorJitter
# ~~~~~~~~~~~
# The :class:`~torchvision.transforms.ColorJitter` transform
# randomly changes the brightness, contrast, saturation, hue, and other properties of an image.
jitter = v2.ColorJitter(brightness=.5, hue=.3)
jittered_imgs = [jitter(orig_img) for _ in range(4)]
plot([orig_img] + jittered_imgs)
# %%
# GaussianBlur
# ~~~~~~~~~~~~
# The :class:`~torchvision.transforms.GaussianBlur` transform
# (see also :func:`~torchvision.transforms.functional.gaussian_blur`)
# performs gaussian blur transform on an image.
blurrer = v2.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5.))
blurred_imgs = [blurrer(orig_img) for _ in range(4)]
plot([orig_img] + blurred_imgs)
# %%
# RandomInvert
# ~~~~~~~~~~~~
# The :class:`~torchvision.transforms.RandomInvert` transform
# (see also :func:`~torchvision.transforms.functional.invert`)
# randomly inverts the colors of the given image.
inverter = T.RandomInvert()
inverter = v2.RandomInvert()
invertered_imgs = [inverter(orig_img) for _ in range(4)]
plot(invertered_imgs)
plot([orig_img] + invertered_imgs)
####################################
# %%
# RandomPosterize
# ~~~~~~~~~~~~~~~
# The :class:`~torchvision.transforms.RandomPosterize` transform
# (see also :func:`~torchvision.transforms.functional.posterize`)
# randomly posterizes the image by reducing the number of bits
# of each color channel.
posterizer = T.RandomPosterize(bits=2)
posterizer = v2.RandomPosterize(bits=2)
posterized_imgs = [posterizer(orig_img) for _ in range(4)]
plot(posterized_imgs)
plot([orig_img] + posterized_imgs)
####################################
# %%
# RandomSolarize
# ~~~~~~~~~~~~~~
# The :class:`~torchvision.transforms.RandomSolarize` transform
# (see also :func:`~torchvision.transforms.functional.solarize`)
# randomly solarizes the image by inverting all pixel values above
# the threshold.
solarizer = T.RandomSolarize(threshold=192.0)
solarizer = v2.RandomSolarize(threshold=192.0)
solarized_imgs = [solarizer(orig_img) for _ in range(4)]
plot(solarized_imgs)
plot([orig_img] + solarized_imgs)
####################################
# %%
# RandomAdjustSharpness
# ~~~~~~~~~~~~~~~~~~~~~
# The :class:`~torchvision.transforms.RandomAdjustSharpness` transform
# (see also :func:`~torchvision.transforms.functional.adjust_sharpness`)
# randomly adjusts the sharpness of the given image.
sharpness_adjuster = T.RandomAdjustSharpness(sharpness_factor=2)
sharpness_adjuster = v2.RandomAdjustSharpness(sharpness_factor=2)
sharpened_imgs = [sharpness_adjuster(orig_img) for _ in range(4)]
plot(sharpened_imgs)
plot([orig_img] + sharpened_imgs)
####################################
# %%
# RandomAutocontrast
# ~~~~~~~~~~~~~~~~~~
# The :class:`~torchvision.transforms.RandomAutocontrast` transform
# (see also :func:`~torchvision.transforms.functional.autocontrast`)
# randomly applies autocontrast to the given image.
autocontraster = T.RandomAutocontrast()
autocontraster = v2.RandomAutocontrast()
autocontrasted_imgs = [autocontraster(orig_img) for _ in range(4)]
plot(autocontrasted_imgs)
plot([orig_img] + autocontrasted_imgs)
####################################
# %%
# RandomEqualize
# ~~~~~~~~~~~~~~
# The :class:`~torchvision.transforms.RandomEqualize` transform
# (see also :func:`~torchvision.transforms.functional.equalize`)
# randomly equalizes the histogram of the given image.
equalizer = T.RandomEqualize()
equalizer = v2.RandomEqualize()
equalized_imgs = [equalizer(orig_img) for _ in range(4)]
plot(equalized_imgs)
plot([orig_img] + equalized_imgs)
####################################
# %%
# Augmentation Transforms
# -----------------------
# The following transforms are combinations of multiple transforms,
# either geometric or photometric, or both.
#
# AutoAugment
# ~~~~~~~~~~~
# The :class:`~torchvision.transforms.AutoAugment` transform
# automatically augments data based on a given auto-augmentation policy.
# See :class:`~torchvision.transforms.AutoAugmentPolicy` for the available policies.
policies = [T.AutoAugmentPolicy.CIFAR10, T.AutoAugmentPolicy.IMAGENET, T.AutoAugmentPolicy.SVHN]
augmenters = [T.AutoAugment(policy) for policy in policies]
policies = [v2.AutoAugmentPolicy.CIFAR10, v2.AutoAugmentPolicy.IMAGENET, v2.AutoAugmentPolicy.SVHN]
augmenters = [v2.AutoAugment(policy) for policy in policies]
imgs = [
[augmenter(orig_img) for _ in range(4)]
for augmenter in augmenters
]
row_title = [str(policy).split('.')[-1] for policy in policies]
plot(imgs, row_title=row_title)
plot([[orig_img] + row for row in imgs], row_title=row_title)
####################################
# %%
# RandAugment
# ~~~~~~~~~~~
# The :class:`~torchvision.transforms.RandAugment` transform automatically augments the data.
augmenter = T.RandAugment()
# The :class:`~torchvision.transforms.RandAugment` is an alternate version of AutoAugment.
augmenter = v2.RandAugment()
imgs = [augmenter(orig_img) for _ in range(4)]
plot(imgs)
plot([orig_img] + imgs)
####################################
# %%
# TrivialAugmentWide
# ~~~~~~~~~~~~~~~~~~
# The :class:`~torchvision.transforms.TrivialAugmentWide` transform automatically augments the data.
augmenter = T.TrivialAugmentWide()
# The :class:`~torchvision.transforms.TrivialAugmentWide` is an alternate implementation of AutoAugment.
# However, instead of transforming an image multiple times, it transforms an image only once
# using a random transform from a given list with a random strength number.
augmenter = v2.TrivialAugmentWide()
imgs = [augmenter(orig_img) for _ in range(4)]
plot(imgs)
plot([orig_img] + imgs)
####################################
# %%
# AugMix
# ~~~~~~
# The :class:`~torchvision.transforms.AugMix` transform automatically augments the data.
augmenter = T.AugMix()
# The :class:`~torchvision.transforms.AugMix` transform interpolates between augmented versions of an image.
augmenter = v2.AugMix()
imgs = [augmenter(orig_img) for _ in range(4)]
plot(imgs)
plot([orig_img] + imgs)
####################################
# Randomly-applied transforms
# %%
# Randomly-applied Transforms
# ---------------------------
#
# Some transforms are randomly-applied given a probability ``p``. That is, the
# transformed image may actually be the same as the original one, even when
# called with the same transformer instance!
# The following transforms are randomly-applied given a probability ``p``. That is, given ``p = 0.5``,
# there is a 50% chance to return the original image, and a 50% chance to return the transformed image,
# even when called with the same transform instance!
#
# RandomHorizontalFlip
# ~~~~~~~~~~~~~~~~~~~~
# The :class:`~torchvision.transforms.RandomHorizontalFlip` transform
# (see also :func:`~torchvision.transforms.functional.hflip`)
# performs horizontal flip of an image, with a given probability.
hflipper = T.RandomHorizontalFlip(p=0.5)
hflipper = v2.RandomHorizontalFlip(p=0.5)
transformed_imgs = [hflipper(orig_img) for _ in range(4)]
plot(transformed_imgs)
plot([orig_img] + transformed_imgs)
####################################
# %%
# RandomVerticalFlip
# ~~~~~~~~~~~~~~~~~~
# The :class:`~torchvision.transforms.RandomVerticalFlip` transform
# (see also :func:`~torchvision.transforms.functional.vflip`)
# performs vertical flip of an image, with a given probability.
vflipper = T.RandomVerticalFlip(p=0.5)
vflipper = v2.RandomVerticalFlip(p=0.5)
transformed_imgs = [vflipper(orig_img) for _ in range(4)]
plot(transformed_imgs)
plot([orig_img] + transformed_imgs)
####################################
# %%
# RandomApply
# ~~~~~~~~~~~
# The :class:`~torchvision.transforms.RandomApply` transform
# randomly applies a list of transforms, with a given probability.
applier = T.RandomApply(transforms=[T.RandomCrop(size=(64, 64))], p=0.5)
applier = v2.RandomApply(transforms=[v2.RandomCrop(size=(64, 64))], p=0.5)
transformed_imgs = [applier(orig_img) for _ in range(4)]
plot(transformed_imgs)
plot([orig_img] + transformed_imgs)
"""
=============
TVTensors FAQ
=============
.. note::
Try on `collab <https://colab.research.google.com/github/pytorch/vision/blob/gh-pages/main/_generated_ipynb_notebooks/plot_tv_tensors.ipynb>`_
or :ref:`go to the end <sphx_glr_download_auto_examples_transforms_plot_tv_tensors.py>` to download the full example code.
TVTensors are Tensor subclasses introduced together with
``torchvision.transforms.v2``. This example showcases what these TVTensors are
and how they behave.
.. warning::
**Intended Audience** Unless you're writing your own transforms or your own TVTensors, you
probably do not need to read this guide. This is a fairly low-level topic
that most users will not need to worry about: you do not need to understand
the internals of TVTensors to efficiently rely on
``torchvision.transforms.v2``. It may however be useful for advanced users
trying to implement their own datasets, transforms, or work directly with
the TVTensors.
"""
# %%
import PIL.Image
import torch
from torchvision import tv_tensors
# %%
# What are TVTensors?
# -------------------
#
# TVTensors are zero-copy tensor subclasses:
tensor = torch.rand(3, 256, 256)
image = tv_tensors.Image(tensor)
assert isinstance(image, torch.Tensor)
assert image.data_ptr() == tensor.data_ptr()
# %%
# Under the hood, they are needed in :mod:`torchvision.transforms.v2` to correctly dispatch to the appropriate function
# for the input data.
#
# :mod:`torchvision.tv_tensors` supports four types of TVTensors:
#
# * :class:`~torchvision.tv_tensors.Image`
# * :class:`~torchvision.tv_tensors.Video`
# * :class:`~torchvision.tv_tensors.BoundingBoxes`
# * :class:`~torchvision.tv_tensors.Mask`
#
# What can I do with a TVTensor?
# ------------------------------
#
# TVTensors look and feel just like regular tensors - they **are** tensors.
# Everything that is supported on a plain :class:`torch.Tensor` like ``.sum()`` or
# any ``torch.*`` operator will also work on TVTensors. See
# :ref:`tv_tensor_unwrapping_behaviour` for a few gotchas.
# %%
# .. _tv_tensor_creation:
#
# How do I construct a TVTensor?
# ------------------------------
#
# Using the constructor
# ^^^^^^^^^^^^^^^^^^^^^
#
# Each TVTensor class takes any tensor-like data that can be turned into a :class:`~torch.Tensor`
image = tv_tensors.Image([[[[0, 1], [1, 0]]]])
print(image)
# %%
# Similar to other PyTorch creations ops, the constructor also takes the ``dtype``, ``device``, and ``requires_grad``
# parameters.
float_image = tv_tensors.Image([[[0, 1], [1, 0]]], dtype=torch.float32, requires_grad=True)
print(float_image)
# %%
# In addition, :class:`~torchvision.tv_tensors.Image` and :class:`~torchvision.tv_tensors.Mask` can also take a
# :class:`PIL.Image.Image` directly:
image = tv_tensors.Image(PIL.Image.open("../assets/astronaut.jpg"))
print(image.shape, image.dtype)
# %%
# Some TVTensors require additional metadata to be passed in ordered to be constructed. For example,
# :class:`~torchvision.tv_tensors.BoundingBoxes` requires the coordinate format as well as the size of the
# corresponding image (``canvas_size``) alongside the actual values. These
# metadata are required to properly transform the bounding boxes.
bboxes = tv_tensors.BoundingBoxes(
[[17, 16, 344, 495], [0, 10, 0, 10]],
format=tv_tensors.BoundingBoxFormat.XYXY,
canvas_size=image.shape[-2:]
)
print(bboxes)
# %%
# Using ``tv_tensors.wrap()``
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# You can also use the :func:`~torchvision.tv_tensors.wrap` function to wrap a tensor object
# into a TVTensor. This is useful when you already have an object of the
# desired type, which typically happens when writing transforms: you just want
# to wrap the output like the input.
new_bboxes = torch.tensor([0, 20, 30, 40])
new_bboxes = tv_tensors.wrap(new_bboxes, like=bboxes)
assert isinstance(new_bboxes, tv_tensors.BoundingBoxes)
assert new_bboxes.canvas_size == bboxes.canvas_size
# %%
# The metadata of ``new_bboxes`` is the same as ``bboxes``, but you could pass
# it as a parameter to override it.
#
# .. _tv_tensor_unwrapping_behaviour:
#
# I had a TVTensor but now I have a Tensor. Help!
# -----------------------------------------------
#
# By default, operations on :class:`~torchvision.tv_tensors.TVTensor` objects
# will return a pure Tensor:
assert isinstance(bboxes, tv_tensors.BoundingBoxes)
# Shift bboxes by 3 pixels in both H and W
new_bboxes = bboxes + 3
assert isinstance(new_bboxes, torch.Tensor)
assert not isinstance(new_bboxes, tv_tensors.BoundingBoxes)
# %%
# .. note::
#
# This behavior only affects native ``torch`` operations. If you are using
# the built-in ``torchvision`` transforms or functionals, you will always get
# as output the same type that you passed as input (pure ``Tensor`` or
# ``TVTensor``).
# %%
# But I want a TVTensor back!
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# You can re-wrap a pure tensor into a TVTensor by just calling the TVTensor
# constructor, or by using the :func:`~torchvision.tv_tensors.wrap` function
# (see more details above in :ref:`tv_tensor_creation`):
new_bboxes = bboxes + 3
new_bboxes = tv_tensors.wrap(new_bboxes, like=bboxes)
assert isinstance(new_bboxes, tv_tensors.BoundingBoxes)
# %%
# Alternatively, you can use the :func:`~torchvision.tv_tensors.set_return_type`
# as a global config setting for the whole program, or as a context manager
# (read its docs to learn more about caveats):
with tv_tensors.set_return_type("TVTensor"):
new_bboxes = bboxes + 3
assert isinstance(new_bboxes, tv_tensors.BoundingBoxes)
# %%
# Why is this happening?
# ^^^^^^^^^^^^^^^^^^^^^^
#
# **For performance reasons**. :class:`~torchvision.tv_tensors.TVTensor`
# classes are Tensor subclasses, so any operation involving a
# :class:`~torchvision.tv_tensors.TVTensor` object will go through the
# `__torch_function__
# <https://pytorch.org/docs/stable/notes/extending.html#extending-torch>`_
# protocol. This induces a small overhead, which we want to avoid when possible.
# This doesn't matter for built-in ``torchvision`` transforms because we can
# avoid the overhead there, but it could be a problem in your model's
# ``forward``.
#
# **The alternative isn't much better anyway.** For every operation where
# preserving the :class:`~torchvision.tv_tensors.TVTensor` type makes
# sense, there are just as many operations where returning a pure Tensor is
# preferable: for example, is ``img.sum()`` still an :class:`~torchvision.tv_tensors.Image`?
# If we were to preserve :class:`~torchvision.tv_tensors.TVTensor` types all
# the way, even model's logits or the output of the loss function would end up
# being of type :class:`~torchvision.tv_tensors.Image`, and surely that's not
# desirable.
#
# .. note::
#
# This behaviour is something we're actively seeking feedback on. If you find this surprising or if you
# have any suggestions on how to better support your use-cases, please reach out to us via this issue:
# https://github.com/pytorch/vision/issues/7319
#
# Exceptions
# ^^^^^^^^^^
#
# There are a few exceptions to this "unwrapping" rule:
# :meth:`~torch.Tensor.clone`, :meth:`~torch.Tensor.to`,
# :meth:`torch.Tensor.detach`, and :meth:`~torch.Tensor.requires_grad_` retain
# the TVTensor type.
#
# Inplace operations on TVTensors like ``obj.add_()`` will preserve the type of
# ``obj``. However, the **returned** value of inplace operations will be a pure
# tensor:
image = tv_tensors.Image([[[0, 1], [1, 0]]])
new_image = image.add_(1).mul_(2)
# image got transformed in-place and is still a TVTensor Image, but new_image
# is a Tensor. They share the same underlying data and they're equal, just
# different classes.
assert isinstance(image, tv_tensors.Image)
print(image)
assert isinstance(new_image, torch.Tensor) and not isinstance(new_image, tv_tensors.Image)
assert (new_image == image).all()
assert new_image.data_ptr() == image.data_ptr()
......@@ -20,6 +20,7 @@ from torchvision.models.efficientnet import (
)
from torchvision.models.googlenet import googlenet
from torchvision.models.inception import inception_v3
from torchvision.models.maxvit import maxvit_t
from torchvision.models.mnasnet import mnasnet0_5, mnasnet0_75, mnasnet1_0, mnasnet1_3
from torchvision.models.mobilenetv2 import mobilenet_v2
from torchvision.models.mobilenetv3 import mobilenet_v3_large, mobilenet_v3_small
......@@ -68,6 +69,17 @@ from torchvision.models.shufflenetv2 import (
shufflenet_v2_x2_0,
)
from torchvision.models.squeezenet import squeezenet1_0, squeezenet1_1
from torchvision.models.swin_transformer import swin_b, swin_s, swin_t
from torchvision.models.swin_transformer import swin_b, swin_s, swin_t, swin_v2_b, swin_v2_s, swin_v2_t
from torchvision.models.vgg import vgg11, vgg11_bn, vgg13, vgg13_bn, vgg16, vgg16_bn, vgg19, vgg19_bn
from torchvision.models.video import (
mc3_18,
mvit_v1_b,
mvit_v2_s,
r2plus1d_18,
r3d_18,
s3d,
swin3d_b,
swin3d_s,
swin3d_t,
)
from torchvision.models.vision_transformer import vit_b_16, vit_b_32, vit_h_14, vit_l_16, vit_l_32
cmake_minimum_required(VERSION 3.4.1)
set(TARGET torchvision_ops)
project(${TARGET} CXX)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD 17)
set(LIBTORCH_HEADER_ROOT ${LIBTORCH_HEADER_ROOT})
set(LIBRARY_OUTPUT_PATH ../lib)
......
pytorch_version = '1.12.0'
pytorch_version = '2.0.0'
Pod::Spec.new do |s|
s.name = 'LibTorchvision'
s.version = '0.13.0'
s.version = '0.15.1'
s.authors = 'PyTorch Team'
s.license = { :type => 'BSD' }
s.homepage = 'https://github.com/pytorch/vision'
......
## Status
The iOS demo of TorchVision is currently unmaintained, untested and likely out-of-date.
## Torchvision maintainers guide
This document aims at documenting user-facing policies / principles used when
developing and maintaining torchvision. Other maintainer info (e.g. release
process) can be found in the meta-internal wiki.
### What is public and what is private?
For the Python API, torchvision largely follows the [PyTorch
policy](https://github.com/pytorch/pytorch/wiki/Public-API-definition-and-documentation)
which is consistent with other major packages
([numpy](https://numpy.org/neps/nep-0023-backwards-compatibility.html),
[scikit-learn](https://scikit-learn.org/dev/glossary.html#term-API) etc.).
We recognize that his policy is somewhat imperfect for some edge cases, and that
it's difficult to come up with an accurate technical definition. In broad terms,
which are usually well understood by users, the policy is that:
- modules that can be accessed without leading underscore are public
- objects in a public file that don't have a leading underscore are public
- class attributes are public iff they have no leading underscore
- the rest of the modules / objects / class attributes are considered private
The public API has backward-compatible (BC) guarantees defined in our
deprecation policy (see below). The private API has not BC guarantees.
For C++, code is private. For Meta employees: if a C++ change breaks fbcode, fix
fbcode or revert the change. We should be careful about models running in
production and relying on torchvision ops.
The `test` folder is not importable and is **private.** Even meta-internal
projects should *not* rely on it (it has happened in the past and is now
programmatically impossible).
The training references do not have BC guarantees. Breaking changes are
possible, but we should make sure that the tutorials are still running properly,
and that their intended narrative is preserved (by e.g. checking outputs,
etc.).
The rest of the folders (build, android, ios, etc.) are private and have no BC
guarantees.
### Deprecation policy.
Because they're disruptive, **deprecations should only be used sparingly**.
We largely follow the [PyTorch
policy](https://github.com/pytorch/pytorch/wiki/PyTorch's-Python-Frontend-Backward-and-Forward-Compatibility-Policy):
breaking changes require a deprecation period of at least 2 versions.
Deprecations should clearly indicate their deadline in the docs and warning
messages. Avoid not committing to a deadline, or keeping deprecated APIs for too
long: it gives no incentive for users to update their code, sends conflicting
messages ("why was this API removed while this other one is still around?"), and
accumulates debt in the project.
### Should this attribute be public? Should this function be private?
When designing an API it’s not always obvious what should be exposed as public,
and what should be kept as a private implementation detail. The following
guidelines can be useful:
* Functional consistency throughout the library is a top priority, for users and
developers’ sake. In doubt and unless it’s clearly wrong, expose what other
similar classes expose.
* Think really hard about the users and their use-cases, and try to expose what
they would need to address those use-cases. Aggressively keep everything else
private. Remember that the “private -> public” direction is way smoother than
the “public -> private” one: in doubt, keep it private.
* When thinking about use-cases, the general API motto applies: make what’s
simple and common easy, and make what’s complex possible (80% / 20% rule).
There might be a ~1% left that’s not addressed: that’s OK. Also, **make what’s
wrong very hard**, if not impossible.
As a good practice, always create new files and even classes with a leading
underscore in their name. This way, everything is private by default and the
only public surface is explicitly present in an `__init__.py` file.
......@@ -7,6 +7,38 @@ allow_redefinition = True
no_implicit_optional = True
warn_redundant_casts = True
[mypy-torchvision.prototype.datapoints.*]
; untyped definitions and calls
disallow_untyped_defs = True
; None and Optional handling
no_implicit_optional = True
; warnings
warn_unused_ignores = True
; miscellaneous strictness flags
allow_redefinition = True
[mypy-torchvision.prototype.transforms.*]
; untyped definitions and calls
disallow_untyped_defs = True
; None and Optional handling
no_implicit_optional = True
; warnings
warn_unused_ignores = True
; miscellaneous strictness flags
allow_redefinition = True
[mypy-torchvision.prototype.datasets.*]
ignore_errors = True
[mypy-torchvision.io.image.*]
ignore_errors = True
......
# Building torchvision packages for release
TorchVision release packages are built by using `build_wheel.sh` and `build_conda.sh` for all permutations of
supported operating systems, compute platforms and python versions.
OS/Python/Compute matrix is defined in https://github.com/pytorch/vision/blob/main/.circleci/regenerate.py
#!/bin/bash
set -ex
PARALLELISM=8
if [ -n "$MAX_JOBS" ]; then
PARALLELISM=$MAX_JOBS
fi
if [[ "$(uname)" != Darwin && "$OSTYPE" != "msys" ]]; then
eval "$(./conda/bin/conda shell.bash hook)"
conda activate ./env
fi
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
. "$script_dir/pkg_helpers.bash"
export BUILD_TYPE=conda
setup_env
export SOURCE_ROOT_DIR="$PWD"
setup_conda_pytorch_constraint
setup_conda_cudatoolkit_plain_constraint
if [[ "$OSTYPE" == "msys" ]]; then
conda install -yq conda-build cmake future
pip install dataclasses
fi
setup_visual_studio_constraint
setup_junit_results_folder
if [[ "$(uname)" == Darwin ]]; then
# TODO: this can be removed as soon as mkl's CMake support works with clang
# see https://github.com/pytorch/vision/pull/4203 for details
MKL_CONSTRAINT='mkl==2021.2.0'
else
MKL_CONSTRAINT=''
fi
if [[ $CONDA_BUILD_VARIANT == "cpu" ]]; then
PYTORCH_MUTEX_CONSTRAINT='pytorch-mutex=1.0=cpu'
else
PYTORCH_MUTEX_CONSTRAINT=''
fi
conda install -yq \pytorch=$PYTORCH_VERSION $CONDA_CUDATOOLKIT_CONSTRAINT $PYTORCH_MUTEX_CONSTRAINT $MKL_CONSTRAINT numpy -c nvidia -c "pytorch-${UPLOAD_CHANNEL}"
TORCH_PATH=$(dirname $(python -c "import torch; print(torch.__file__)"))
if [[ "$(uname)" == Darwin || "$OSTYPE" == "msys" ]]; then
conda install -yq libpng jpeg
else
yum install -y libpng-devel libjpeg-turbo-devel
fi
if [[ "$OSTYPE" == "msys" ]]; then
source .circleci/unittest/windows/scripts/set_cuda_envs.sh
fi
mkdir cpp_build
pushd cpp_build
# Generate libtorchvision files
cmake .. -DTorch_DIR=$TORCH_PATH/share/cmake/Torch -DWITH_CUDA=$CMAKE_USE_CUDA
# Compile and install libtorchvision
if [[ "$OSTYPE" == "msys" ]]; then
"$script_dir/windows/internal/vc_env_helper.bat" "$script_dir/windows/internal/build_cmake.bat" $PARALLELISM
CONDA_PATH=$(dirname $(which python))
cp -r "C:/Program Files (x86)/torchvision/include/torchvision" $CONDA_PATH/include
else
make -j$PARALLELISM
make install
if [[ "$(uname)" == Darwin ]]; then
CONDA_PATH=$(dirname $(dirname $(which python)))
cp -r /usr/local/include/torchvision $CONDA_PATH/include/
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
fi
fi
popd
# Install torchvision locally
python setup.py develop
# Trace, compile and run project that uses Faster-RCNN
pushd test/tracing/frcnn
mkdir build
# Trace model
python trace_model.py
cp fasterrcnn_resnet50_fpn.pt build
cd build
cmake .. -DTorch_DIR=$TORCH_PATH/share/cmake/Torch -DWITH_CUDA=$CMAKE_USE_CUDA
if [[ "$OSTYPE" == "msys" ]]; then
"$script_dir/windows/internal/vc_env_helper.bat" "$script_dir/windows/internal/build_frcnn.bat" $PARALLELISM
mv fasterrcnn_resnet50_fpn.pt Release
cd Release
export PATH=$(cygpath -w "C:/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64"):$(cygpath -w "C:/Program Files (x86)/torchvision/bin"):$(cygpath -w $TORCH_PATH)/lib:$PATH
else
make -j$PARALLELISM
fi
# Run traced program
./test_frcnn_tracing
# Compile and run the CPP example
popd
cd examples/cpp/hello_world
mkdir build
# Trace model
python trace_model.py
cp resnet18.pt build
cd build
cmake .. -DTorch_DIR=$TORCH_PATH/share/cmake/Torch
if [[ "$OSTYPE" == "msys" ]]; then
"$script_dir/windows/internal/vc_env_helper.bat" "$script_dir/windows/internal/build_cpp_example.bat" $PARALLELISM
mv resnet18.pt Release
cd Release
else
make -j$PARALLELISM
fi
# Run CPP example
./hello-world
#!/bin/bash
set -ex
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
. "$script_dir/pkg_helpers.bash"
export BUILD_TYPE=conda
setup_env
export SOURCE_ROOT_DIR="$PWD"
setup_conda_pytorch_constraint
setup_conda_cudatoolkit_constraint
setup_visual_studio_constraint
setup_junit_results_folder
export CUDATOOLKIT_CHANNEL="nvidia"
conda build -c $CUDATOOLKIT_CHANNEL $CONDA_CHANNEL_FLAGS --no-anaconda-upload --python "$PYTHON_VERSION" packaging/torchvision
#!/bin/bash
set -ex
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
. "$script_dir/pkg_helpers.bash"
export BUILD_TYPE=wheel
setup_env
setup_wheel_python
pip_install numpy pyyaml future ninja
pip_install --upgrade setuptools
setup_pip_pytorch_version
python setup.py clean
# Copy binaries to be included in the wheel distribution
if [[ "$(uname)" == Darwin || "$OSTYPE" == "msys" ]]; then
python_exec="$(which python)"
bin_path=$(dirname $python_exec)
env_path=$(dirname $bin_path)
if [[ "$(uname)" == Darwin ]]; then
# Install delocate to relocate the required binaries
pip_install "delocate>=0.9"
else
cp "$bin_path/Library/bin/libpng16.dll" torchvision
cp "$bin_path/Library/bin/libjpeg.dll" torchvision
fi
else
# Install auditwheel to get some inspection utilities
pip_install auditwheel
# Point to custom libraries
export LD_LIBRARY_PATH=$(pwd)/ext_libraries/lib:$LD_LIBRARY_PATH
export TORCHVISION_INCLUDE=$(pwd)/ext_libraries/include
export TORCHVISION_LIBRARY=$(pwd)/ext_libraries/lib
fi
download_copy_ffmpeg
if [[ "$OSTYPE" == "msys" ]]; then
IS_WHEEL=1 "$script_dir/windows/internal/vc_env_helper.bat" python setup.py bdist_wheel
else
IS_WHEEL=1 python setup.py bdist_wheel
fi
if [[ "$(uname)" == Darwin ]]; then
pushd dist/
python_exec="$(which python)"
bin_path=$(dirname $python_exec)
env_path=$(dirname $bin_path)
for whl in *.whl; do
DYLD_FALLBACK_LIBRARY_PATH="$env_path/lib/:$DYLD_FALLBACK_LIBRARY_PATH" delocate-wheel -v --ignore-missing-dependencies $whl
done
else
if [[ "$OSTYPE" == "msys" ]]; then
"$script_dir/windows/internal/vc_env_helper.bat" python $script_dir/wheel/relocate.py
else
LD_LIBRARY_PATH="/usr/local/lib:$CUDA_HOME/lib64:$LD_LIBRARY_PATH" python $script_dir/wheel/relocate.py
fi
fi
# A set of useful bash functions for common functionality we need to do in
# many build scripts
# Setup CUDA environment variables, based on CU_VERSION
#
# Inputs:
# CU_VERSION (cpu, cu92, cu100)
# NO_CUDA_PACKAGE (bool)
# BUILD_TYPE (conda, wheel)
#
# Outputs:
# VERSION_SUFFIX (e.g., "")
# PYTORCH_VERSION_SUFFIX (e.g., +cpu)
# WHEEL_DIR (e.g., cu100/)
# CUDA_HOME (e.g., /usr/local/cuda-9.2, respected by torch.utils.cpp_extension)
# FORCE_CUDA (respected by torchvision setup.py)
# NVCC_FLAGS (respected by torchvision setup.py)
#
# Precondition: CUDA versions are installed in their conventional locations in
# /usr/local/cuda-*
#
# NOTE: Why VERSION_SUFFIX versus PYTORCH_VERSION_SUFFIX? If you're building
# a package with CUDA on a platform we support CUDA on, VERSION_SUFFIX ==
# PYTORCH_VERSION_SUFFIX and everyone is happy. However, if you are building a
# package with only CPU bits (e.g., torchaudio), then VERSION_SUFFIX is always
# empty, but PYTORCH_VERSION_SUFFIX is +cpu (because that's how you get a CPU
# version of a Python package. But that doesn't apply if you're on OS X,
# since the default CU_VERSION on OS X is cpu.
setup_cuda() {
# First, compute version suffixes. By default, assume no version suffixes
export VERSION_SUFFIX=""
export PYTORCH_VERSION_SUFFIX=""
export WHEEL_DIR=""
# Wheel builds need suffixes (but not if they're on OS X, which never has suffix)
if [[ "$BUILD_TYPE" == "wheel" ]] && [[ "$(uname)" != Darwin ]]; then
export PYTORCH_VERSION_SUFFIX="+$CU_VERSION"
# Match the suffix scheme of pytorch, unless this package does not have
# CUDA builds (in which case, use default)
if [[ -z "$NO_CUDA_PACKAGE" ]]; then
export VERSION_SUFFIX="$PYTORCH_VERSION_SUFFIX"
export WHEEL_DIR="$CU_VERSION/"
fi
fi
# Now work out the CUDA settings
case "$CU_VERSION" in
cu117)
if [[ "$OSTYPE" == "msys" ]]; then
export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.7"
else
export CUDA_HOME=/usr/local/cuda-11.7/
fi
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
;;
cu116)
if [[ "$OSTYPE" == "msys" ]]; then
export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.6"
else
export CUDA_HOME=/usr/local/cuda-11.6/
fi
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
;;
cpu)
;;
rocm*)
export FORCE_CUDA=1
;;
*)
echo "Unrecognized CU_VERSION=$CU_VERSION"
exit 1
;;
esac
if [[ -n "$CUDA_HOME" ]]; then
# Adds nvcc binary to the search path so that CMake's `find_package(CUDA)` will pick the right one
export PATH="$CUDA_HOME/bin:$PATH"
export FORCE_CUDA=1
fi
}
# Populate build version if necessary, and add version suffix
#
# Inputs:
# BUILD_VERSION (e.g., 0.2.0 or empty)
# VERSION_SUFFIX (e.g., +cpu)
#
# Outputs:
# BUILD_VERSION (e.g., 0.2.0.dev20190807+cpu)
#
# Fill BUILD_VERSION if it doesn't exist already with a nightly string
# Usage: setup_build_version 0.2.0
setup_build_version() {
if [[ -z "$BUILD_VERSION" ]]; then
if [[ -z "$1" ]]; then
setup_base_build_version
else
BUILD_VERSION="$1"
fi
BUILD_VERSION="$BUILD_VERSION.dev$(date "+%Y%m%d")$VERSION_SUFFIX"
else
BUILD_VERSION="$BUILD_VERSION$VERSION_SUFFIX"
fi
# Set build version based on tag if on tag
if [[ -n "${CIRCLE_TAG}" ]]; then
# Strip tag
BUILD_VERSION="$(echo "${CIRCLE_TAG}" | sed -e 's/^v//' -e 's/-.*$//')${VERSION_SUFFIX}"
fi
export BUILD_VERSION
}
setup_base_build_version() {
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
# version.txt for some reason has `a` character after major.minor.rev
# command below yields 0.10.0 from version.txt containing 0.10.0a0
BUILD_VERSION=$( cut -f 1 -d a "$SCRIPT_DIR/../version.txt" )
export BUILD_VERSION
}
# Set some useful variables for OS X, if applicable
setup_macos() {
if [[ "$(uname)" == Darwin ]]; then
export MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++
fi
}
# Top-level entry point for things every package will need to do
#
# Usage: setup_env 0.2.0
setup_env() {
setup_cuda
setup_build_version "$1"
setup_macos
}
# Function to retry functions that sometimes timeout or have flaky failures
retry () {
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
}
# Inputs:
# PYTHON_VERSION (3.7, 3.8, 3.9)
# UNICODE_ABI (bool)
#
# Outputs:
# PATH modified to put correct Python version in PATH
#
# Precondition: If Linux, you are in a soumith/manylinux-cuda* Docker image
setup_wheel_python() {
if [[ "$(uname)" == Darwin || "$OSTYPE" == "msys" ]]; then
eval "$(conda shell.bash hook)"
conda env remove -n "env$PYTHON_VERSION" || true
conda create ${CONDA_CHANNEL_FLAGS} -yn "env$PYTHON_VERSION" python="$PYTHON_VERSION"
conda activate "env$PYTHON_VERSION"
# Install libpng from Anaconda (defaults)
conda install ${CONDA_CHANNEL_FLAGS} libpng "jpeg<=9b" -y
else
# Install native CentOS libJPEG, freetype and GnuTLS
yum install -y libjpeg-turbo-devel freetype gnutls
case "$PYTHON_VERSION" in
3.7) python_abi=cp37-cp37m ;;
3.8) python_abi=cp38-cp38 ;;
3.9) python_abi=cp39-cp39 ;;
3.10) python_abi=cp310-cp310 ;;
*)
echo "Unrecognized PYTHON_VERSION=$PYTHON_VERSION"
exit 1
;;
esac
# Download all the dependencies required to compile image and video_reader
# extensions
mkdir -p ext_libraries
pushd ext_libraries
popd
export PATH="/opt/python/$python_abi/bin:$(pwd)/ext_libraries/bin:$PATH"
fi
}
# Install with pip a bit more robustly than the default
pip_install() {
retry pip install --progress-bar off "$@"
}
# Install torch with pip, respecting PYTORCH_VERSION, and record the installed
# version into PYTORCH_VERSION, if applicable
setup_pip_pytorch_version() {
if [[ -z "$PYTORCH_VERSION" ]]; then
# Install latest prerelease version of torch, per our nightlies, consistent
# with the requested cuda version
pip_install --pre torch -f "https://download.pytorch.org/whl/test/${WHEEL_DIR}torch_test.html"
if [[ "$CUDA_VERSION" == "cpu" ]]; then
# CUDA and CPU are ABI compatible on the CPU-only parts, so strip
# in this case
export PYTORCH_VERSION="$(pip show torch | grep ^Version: | sed 's/Version: *//' | sed 's/+.\+//')"
else
export PYTORCH_VERSION="$(pip show torch | grep ^Version: | sed 's/Version: *//')"
fi
else
pip_install "torch==$PYTORCH_VERSION$PYTORCH_VERSION_SUFFIX" \
-f "https://download.pytorch.org/whl/${CU_VERSION}/torch_stable.html" \
-f "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/${CU_VERSION}/torch_${UPLOAD_CHANNEL}.html"
fi
}
# Fill PYTORCH_VERSION with the latest conda nightly version, and
# CONDA_CHANNEL_FLAGS with appropriate flags to retrieve these versions
#
# You MUST have populated PYTORCH_VERSION_SUFFIX before hand.
setup_conda_pytorch_constraint() {
if [[ -z "$PYTORCH_VERSION" ]]; then
export CONDA_CHANNEL_FLAGS="${CONDA_CHANNEL_FLAGS} -c pytorch-test -c pytorch"
PYTHON="python"
# Check if we have python 3 instead and prefer that
if python3 --version >/dev/null 2>/dev/null; then
PYTHON="python3"
fi
export PYTORCH_VERSION="$(conda search --json 'pytorch[channel=pytorch-test]' | \
${PYTHON} -c "import os, sys, json, re; cuver = os.environ.get('CU_VERSION'); \
cuver_1 = cuver.replace('cu', 'cuda') if cuver != 'cpu' else cuver; \
cuver_2 = (cuver[:-1] + '.' + cuver[-1]).replace('cu', 'cuda') if cuver != 'cpu' else cuver; \
print(re.sub(r'\\+.*$', '', \
[x['version'] for x in json.load(sys.stdin)['pytorch'] \
if (x['platform'] == 'darwin' or cuver_1 in x['fn'] or cuver_2 in x['fn']) \
and 'py' + os.environ['PYTHON_VERSION'] in x['fn']][-1]))")"
if [[ -z "$PYTORCH_VERSION" ]]; then
echo "PyTorch version auto detection failed"
echo "No package found for CU_VERSION=$CU_VERSION and PYTHON_VERSION=$PYTHON_VERSION"
exit 1
fi
else
export CONDA_CHANNEL_FLAGS="${CONDA_CHANNEL_FLAGS} -c pytorch -c pytorch-${UPLOAD_CHANNEL}"
fi
if [[ "$CU_VERSION" == cpu ]]; then
export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==$PYTORCH_VERSION${PYTORCH_VERSION_SUFFIX}"
export CONDA_PYTORCH_CONSTRAINT="- pytorch==$PYTORCH_VERSION"
else
export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==${PYTORCH_VERSION}${PYTORCH_VERSION_SUFFIX}"
export CONDA_PYTORCH_CONSTRAINT="- pytorch==${PYTORCH_VERSION}${PYTORCH_VERSION_SUFFIX}"
fi
if [[ "$OSTYPE" == msys && "$CU_VERSION" == cu92 ]]; then
export CONDA_CHANNEL_FLAGS="${CONDA_CHANNEL_FLAGS} -c defaults -c numba/label/dev"
fi
}
# Translate CUDA_VERSION into CUDA_CUDATOOLKIT_CONSTRAINT
setup_conda_cudatoolkit_constraint() {
export CONDA_BUILD_VARIANT="cuda"
if [[ "$(uname)" == Darwin ]]; then
export CONDA_BUILD_VARIANT="cpu"
else
case "$CU_VERSION" in
cu117)
export CONDA_CUDATOOLKIT_CONSTRAINT="- pytorch-cuda=11.7 # [not osx]"
;;
cu116)
export CONDA_CUDATOOLKIT_CONSTRAINT="- pytorch-cuda=11.6 # [not osx]"
;;
cpu)
export CONDA_CUDATOOLKIT_CONSTRAINT=""
export CONDA_BUILD_VARIANT="cpu"
;;
*)
echo "Unrecognized CU_VERSION=$CU_VERSION"
exit 1
;;
esac
fi
}
setup_conda_cudatoolkit_plain_constraint() {
export CONDA_BUILD_VARIANT="cuda"
export CMAKE_USE_CUDA=1
if [[ "$(uname)" == Darwin ]]; then
export CONDA_BUILD_VARIANT="cpu"
export CMAKE_USE_CUDA=0
else
case "$CU_VERSION" in
cu117)
export CONDA_CUDATOOLKIT_CONSTRAINT="pytorch-cuda=11.7"
;;
cu116)
export CONDA_CUDATOOLKIT_CONSTRAINT="pytorch-cuda=11.6"
;;
cpu)
export CONDA_CUDATOOLKIT_CONSTRAINT=""
export CONDA_BUILD_VARIANT="cpu"
export CMAKE_USE_CUDA=0
;;
*)
echo "Unrecognized CU_VERSION=$CU_VERSION"
exit 1
;;
esac
fi
}
# Build the proper compiler package before building the final package
setup_visual_studio_constraint() {
if [[ "$OSTYPE" == "msys" ]]; then
export VSTOOLCHAIN_PACKAGE=vs$VC_YEAR
conda build $CONDA_CHANNEL_FLAGS --no-anaconda-upload packaging/$VSTOOLCHAIN_PACKAGE
cp packaging/$VSTOOLCHAIN_PACKAGE/conda_build_config.yaml packaging/torchvision/conda_build_config.yaml
fi
}
setup_junit_results_folder() {
if [[ "$CI" == "true" ]]; then
export CONDA_PYTORCH_BUILD_RESULTS_DIRECTORY="${SOURCE_ROOT_DIR}/build_results/results.xml"
fi
}
download_copy_ffmpeg() {
if [[ "$OSTYPE" == "msys" ]]; then
# conda install -yq ffmpeg=4.2 -c pytorch
# curl -L -q https://anaconda.org/pytorch/ffmpeg/4.3/download/win-64/ffmpeg-4.3-ha925a31_0.tar.bz2 --output ffmpeg-4.3-ha925a31_0.tar.bz2
# bzip2 --decompress --stdout ffmpeg-4.3-ha925a31_0.tar.bz2 | tar -x --file=-
# cp Library/bin/*.dll ../torchvision
echo "FFmpeg is disabled currently on Windows"
else
if [[ "$(uname)" == Darwin ]]; then
conda install -yq ffmpeg=4.2 -c pytorch
conda install -yq wget
else
# pushd ext_libraries
# wget -q https://anaconda.org/pytorch/ffmpeg/4.2/download/linux-64/ffmpeg-4.2-hf484d3e_0.tar.bz2
# tar -xjvf ffmpeg-4.2-hf484d3e_0.tar.bz2
# rm -rf ffmpeg-4.2-hf484d3e_0.tar.bz2
# ldconfig
# which ffmpeg
# popd
echo "FFmpeg is disabled currently on Linux"
fi
fi
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment