plot_datapoints.py 9.11 KB
Newer Older
1
2
3
4
5
"""
==============
Datapoints FAQ
==============

6
7
8
9
10
11
12
13
14
15
16
17
18
Datapoints are Tensor subclasses introduced together with
``torchvision.transforms.v2``. This example showcases what these datapoints are
and how they behave.

.. warning::

    **Intended Audience** Unless you're writing your own transforms or your own datapoints, you
    probably do not need to read this guide. This is a fairly low-level topic
    that most users will not need to worry about: you do not need to understand
    the internals of datapoints to efficiently rely on
    ``torchvision.transforms.v2``. It may however be useful for advanced users
    trying to implement their own datasets, transforms, or work directly with
    the datapoints.
19
20
"""

21
# %%
22
23
24
25
26
27
28
29
30
31
import PIL.Image

import torch
import torchvision

# We are using BETA APIs, so we deactivate the associated warning, thereby acknowledging that
# some APIs may slightly change in the future
torchvision.disable_beta_transforms_warning()

from torchvision import datapoints
32
from torchvision.transforms.v2 import functional as F
33
34


35
# %%
36
37
38
39
40
41
42
43
44
45
46
# What are datapoints?
# --------------------
#
# Datapoints are zero-copy tensor subclasses:

tensor = torch.rand(3, 256, 256)
image = datapoints.Image(tensor)

assert isinstance(image, torch.Tensor)
assert image.data_ptr() == tensor.data_ptr()

47
# %%
48
49
50
# Under the hood, they are needed in :mod:`torchvision.transforms.v2` to correctly dispatch to the appropriate function
# for the input data.
#
51
52
53
54
55
56
57
58
59
60
# What can I do with a datapoint?
# -------------------------------
#
# Datapoints look and feel just like regular tensors - they **are** tensors.
# Everything that is supported on a plain :class:`torch.Tensor` like ``.sum()`` or
# any ``torch.*`` operator will also works on datapoints. See
# :ref:`datapoint_unwrapping_behaviour` for a few gotchas.

# %%
#
61
62
63
64
65
66
67
# What datapoints are supported?
# ------------------------------
#
# So far :mod:`torchvision.datapoints` supports four types of datapoints:
#
# * :class:`~torchvision.datapoints.Image`
# * :class:`~torchvision.datapoints.Video`
68
# * :class:`~torchvision.datapoints.BoundingBoxes`
69
70
# * :class:`~torchvision.datapoints.Mask`
#
71
72
# .. _datapoint_creation:
#
73
74
75
# How do I construct a datapoint?
# -------------------------------
#
76
77
78
# Using the constructor
# ^^^^^^^^^^^^^^^^^^^^^
#
79
80
81
82
83
84
# Each datapoint class takes any tensor-like data that can be turned into a :class:`~torch.Tensor`

image = datapoints.Image([[[[0, 1], [1, 0]]]])
print(image)


85
# %%
86
87
88
89
90
91
92
# Similar to other PyTorch creations ops, the constructor also takes the ``dtype``, ``device``, and ``requires_grad``
# parameters.

float_image = datapoints.Image([[[0, 1], [1, 0]]], dtype=torch.float32, requires_grad=True)
print(float_image)


93
# %%
94
# In addition, :class:`~torchvision.datapoints.Image` and :class:`~torchvision.datapoints.Mask` can also take a
95
96
97
98
99
# :class:`PIL.Image.Image` directly:

image = datapoints.Image(PIL.Image.open("assets/astronaut.jpg"))
print(image.shape, image.dtype)

100
# %%
101
102
103
104
105
106
107
108
109
# Some datapoints require additional metadata to be passed in ordered to be constructed. For example,
# :class:`~torchvision.datapoints.BoundingBoxes` requires the coordinate format as well as the size of the
# corresponding image (``canvas_size``) alongside the actual values. These
# metadata are required to properly transform the bounding boxes.

bboxes = datapoints.BoundingBoxes(
    [[17, 16, 344, 495], [0, 10, 0, 10]],
    format=datapoints.BoundingBoxFormat.XYXY,
    canvas_size=image.shape[-2:]
110
)
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
print(bboxes)

# %%
# Using the ``wrap_like()`` class method
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# You can also use the ``wrap_like()`` class method to wrap a tensor object
# into a datapoint. This is useful when you already have an object of the
# desired type, which typically happens when writing transforms: you just want
# to wrap the output like the input. This API is inspired by utils like
# :func:`torch.zeros_like`:

new_bboxes = torch.tensor([0, 20, 30, 40])
new_bboxes = datapoints.BoundingBoxes.wrap_like(bboxes, new_bboxes)
assert isinstance(new_bboxes, datapoints.BoundingBoxes)
assert new_bboxes.canvas_size == bboxes.canvas_size
127
128


129
# %%
130
131
132
133
134
# The metadata of ``new_bboxes`` is the same as ``bboxes``, but you could pass
# it as a parameter to override it. Check the
# :meth:`~torchvision.datapoints.BoundingBoxes.wrap_like` documentation for
# more details.
#
135
136
137
# Do I have to wrap the output of the datasets myself?
# ----------------------------------------------------
#
138
139
# TODO: Move this in another guide - this is user-facing, not dev-facing.
#
140
141
142
143
144
# Only if you are using custom datasets. For the built-in ones, you can use
# :func:`torchvision.datasets.wrap_dataset_for_transforms_v2`. Note that the function also supports subclasses of the
# built-in datasets. Meaning, if your custom dataset subclasses from a built-in one and the output type is the same, you
# also don't have to wrap manually.
#
145
146
147
148
149
150
151
152
153
154
155
# If you have a custom dataset, for example the ``PennFudanDataset`` from
# `this tutorial <https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html>`_, you have two options:
#
# 1. Perform the wrapping inside ``__getitem__``:

class PennFudanDataset(torch.utils.data.Dataset):
    ...

    def __getitem__(self, item):
        ...

156
157
        target["bboxes"] = datapoints.BoundingBoxes(
            bboxes,
158
            format=datapoints.BoundingBoxFormat.XYXY,
Philip Meier's avatar
Philip Meier committed
159
            canvas_size=F.get_size(img),
160
161
162
163
164
165
166
167
168
169
170
        )
        target["labels"] = labels
        target["masks"] = datapoints.Mask(masks)

        ...

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        ...

171
# %%
172
173
174
175
176
# 2. Perform the wrapping inside a custom transformation at the beginning of your pipeline:


class WrapPennFudanDataset:
    def __call__(self, img, target):
177
        target["boxes"] = datapoints.BoundingBoxes(
178
179
            target["boxes"],
            format=datapoints.BoundingBoxFormat.XYXY,
Philip Meier's avatar
Philip Meier committed
180
            canvas_size=F.get_size(img),
181
182
183
184
185
186
187
188
189
190
191
192
193
194
        )
        target["masks"] = datapoints.Mask(target["masks"])
        return img, target


...


def get_transform(train):
    transforms = []
    transforms.append(WrapPennFudanDataset())
    transforms.append(T.PILToTensor())
    ...

195
# %%
196
197
# .. note::
#
198
#    If both :class:`~torchvision.datapoints.BoundingBoxes` and :class:`~torchvision.datapoints.Mask`'s are included in
199
200
201
202
203
204
205
206
#    the sample, ``torchvision.transforms.v2`` will transform them both. Meaning, if you don't need both, dropping or
#    at least not wrapping the obsolete parts, can lead to a significant performance boost.
#
#    For example, if you are using the ``PennFudanDataset`` for object detection, not wrapping the masks avoids
#    transforming them over and over again in the pipeline just to ultimately ignoring them. In general, it would be
#    even better to not load the masks at all, but this is not possible in this example, since the bounding boxes are
#    generated from the masks.
#
207
# .. _datapoint_unwrapping_behaviour:
208
#
209
210
211
212
213
214
# I had a Datapoint but now I have a Tensor. Help!
# ------------------------------------------------
#
# For a lot of operations involving datapoints, we cannot safely infer whether
# the result should retain the datapoint type, so we choose to return a plain
# tensor instead of a datapoint (this might change, see note below):
215

216

217
assert isinstance(bboxes, datapoints.BoundingBoxes)
218

219
220
# Shift bboxes by 3 pixels in both H and W
new_bboxes = bboxes + 3
221

222
223
224
225
226
227
228
229
230
231
assert isinstance(new_bboxes, torch.Tensor) and not isinstance(new_bboxes, datapoints.BoundingBoxes)

# %%
# If you're writing your own custom transforms or code involving datapoints, you
# can re-wrap the output into a datapoint by just calling their constructor, or
# by using the ``.wrap_like()`` class method:

new_bboxes = bboxes + 3
new_bboxes = datapoints.BoundingBoxes.wrap_like(bboxes, new_bboxes)
assert isinstance(new_bboxes, datapoints.BoundingBoxes)
232

233
# %%
234
235
236
237
238
239
240
241
# See more details above in :ref:`datapoint_creation`.
#
# .. note::
#
#    You never need to re-wrap manually if you're using the built-in transforms
#    or their functional equivalents: this is automatically taken care of for
#    you.
#
242
243
244
245
246
247
# .. note::
#
#    This "unwrapping" behaviour is something we're actively seeking feedback on. If you find this surprising or if you
#    have any suggestions on how to better support your use-cases, please reach out to us via this issue:
#    https://github.com/pytorch/vision/issues/7319
#
248
# There are a few exceptions to this "unwrapping" rule:
249
#
250
251
252
253
254
255
# 1. Operations like :meth:`~torch.Tensor.clone`, :meth:`~torch.Tensor.to`,
#    :meth:`torch.Tensor.detach` and :meth:`~torch.Tensor.requires_grad_` retain
#    the datapoint type.
# 2. Inplace operations on datapoints like ``.add_()`` preserve they type. However,
#    the **returned** value of inplace operations will be unwrapped into a pure
#    tensor:
256
257
258
259
260

image = datapoints.Image([[[0, 1], [1, 0]]])

new_image = image.add_(1).mul_(2)

261
262
263
264
# image got transformed in-place and is still an Image datapoint, but new_image
# is a Tensor. They share the same underlying data and they're equal, just
# different classes.
assert isinstance(image, datapoints.Image)
265
266
267
268
print(image)

assert isinstance(new_image, torch.Tensor) and not isinstance(new_image, datapoints.Image)
assert (new_image == image).all()
269
assert new_image.data_ptr() == image.data_ptr()