Commit bf491463 authored by limm's avatar limm
Browse files

add v0.19.1 release

parent e17f5ea2
......@@ -24,7 +24,7 @@ docset: html
convert $(SPHINXPROJ).docset/icon@2x.png -resize 16x16 $(SPHINXPROJ).docset/icon.png
html-noplot: # Avoids running the gallery examples, which may take time
$(SPHINXBUILD) -D plot_gallery=0 -b html $(ASPHINXOPTS) "${SOURCEDIR}" "$(BUILDDIR)"/html
$(SPHINXBUILD) -D plot_gallery=0 -b html "${SOURCEDIR}" "$(BUILDDIR)"/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
......@@ -32,6 +32,8 @@ clean:
rm -rf $(BUILDDIR)/*
rm -rf $(SOURCEDIR)/auto_examples/ # sphinx-gallery
rm -rf $(SOURCEDIR)/gen_modules/ # sphinx-gallery
rm -rf $(SOURCEDIR)/generated/ # autosummary
rm -rf $(SOURCEDIR)/models/generated # autosummary
.PHONY: help Makefile docset
......
sphinx==2.4.4
sphinx-gallery>=0.9.0
sphinx-copybutton>=0.3.1
matplotlib
numpy
-e git+git://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
sphinx-copybutton>=0.3.1
sphinx-gallery>=0.11.1
sphinx==5.0.0
tabulate
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
pycocotools
/* This rule (and possibly this entire file) should be removed once
/* This rule should be removed once
https://github.com/pytorch/pytorch_sphinx_theme/issues/125 is fixed.
We override the rule so that the links to the notebooks aren't hidden in the
......@@ -9,4 +9,27 @@ torchvision it just hides the links. So we have to put them back here */
article.pytorch-article .sphx-glr-download-link-note.admonition.note,
article.pytorch-article .reference.download.internal, article.pytorch-article .sphx-glr-signature {
display: block;
}
\ No newline at end of file
}
/* These 2 rules below are for the weight tables (generated in conf.py) to look
* better. In particular we make their row height shorter */
.table-weights td, .table-weights th {
margin-bottom: 0.2rem;
padding: 0 !important;
line-height: 1 !important;
}
.table-weights p {
margin-bottom: 0.2rem !important;
}
/* Fix for Sphinx gallery 0.11
See https://github.com/sphinx-gallery/sphinx-gallery/issues/990
*/
article.pytorch-article .sphx-glr-thumbnails .sphx-glr-thumbcontainer {
width: unset;
margin-right: 0;
margin-left: 0;
}
article.pytorch-article div.section div.wy-table-responsive tbody td {
width: 50%;
}
......@@ -30,4 +30,4 @@
style="fill:#9e529f"
id="path4698"
d="m 24.075479,-7.6293945e-7 c -0.5,0 -1.8,2.49999996293945 -1.8,3.59999996293945 0,1.5 1,2 1.8,2 0.8,0 1.8,-0.5 1.8,-2 -0.1,-1.1 -1.4,-3.59999996293945 -1.8,-3.59999996293945 z"
class="st1" /></svg>
\ No newline at end of file
class="st1" /></svg>
.. role:: hidden
:class: hidden-section
.. currentmodule:: {{ module }}
{{ name | underline}}
.. autoclass:: {{ name }}
:members:
.. role:: hidden
:class: hidden-section
.. currentmodule:: {{ module }}
{{ name | underline}}
.. autoclass:: {{ name }}
:members:
__getitem__,
{% if "category_name" in methods %} category_name {% endif %}
:special-members:
.. role:: hidden
:class: hidden-section
.. currentmodule:: {{ module }}
{{ name | underline}}
.. autofunction:: {{ name }}
from docutils import nodes
from docutils.parsers.rst import Directive
class BetaStatus(Directive):
has_content = True
text = "The {api_name} is in Beta stage, and backward compatibility is not guaranteed."
node = nodes.warning
def run(self):
text = self.text.format(api_name=" ".join(self.content))
return [self.node("", nodes.paragraph("", "", nodes.Text(text)))]
def setup(app):
app.add_directive("betastatus", BetaStatus)
return {
"version": "0.1",
"parallel_read_safe": True,
"parallel_write_safe": True,
}
This diff is collapsed.
torchvision.datasets
====================
.. _datasets:
Datasets
========
Torchvision provides many built-in datasets in the ``torchvision.datasets``
module, as well as utility classes for building your own datasets.
Built-in datasets
-----------------
All datasets are subclasses of :class:`torch.utils.data.Dataset`
i.e, they have ``__getitem__`` and ``__len__`` methods implemented.
......@@ -19,242 +27,157 @@ All the datasets have almost similar API. They all have two common arguments:
``transform`` and ``target_transform`` to transform the input and target respectively.
You can also create your own datasets using the provided :ref:`base classes <base_classes_datasets>`.
Caltech
~~~~~~~
.. autoclass:: Caltech101
:members: __getitem__
:special-members:
.. autoclass:: Caltech256
:members: __getitem__
:special-members:
CelebA
~~~~~~
.. autoclass:: CelebA
:members: __getitem__
:special-members:
CIFAR
~~~~~
.. autoclass:: CIFAR10
:members: __getitem__
:special-members:
.. autoclass:: CIFAR100
Cityscapes
~~~~~~~~~~
.. note ::
Requires Cityscape to be downloaded.
.. autoclass:: Cityscapes
:members: __getitem__
:special-members:
COCO
~~~~
.. note ::
These require the `COCO API to be installed`_
.. _COCO API to be installed: https://github.com/pdollar/coco/tree/master/PythonAPI
Captions
^^^^^^^^
.. autoclass:: CocoCaptions
:members: __getitem__
:special-members:
Detection
^^^^^^^^^
.. autoclass:: CocoDetection
:members: __getitem__
:special-members:
EMNIST
~~~~~~
.. autoclass:: EMNIST
FakeData
~~~~~~~~
.. autoclass:: FakeData
Fashion-MNIST
~~~~~~~~~~~~~
.. autoclass:: FashionMNIST
Flickr
~~~~~~
.. autoclass:: Flickr8k
:members: __getitem__
:special-members:
.. autoclass:: Flickr30k
:members: __getitem__
:special-members:
HMDB51
~~~~~~~
.. autoclass:: HMDB51
:members: __getitem__
:special-members:
ImageNet
~~~~~~~~~~~
.. autoclass:: ImageNet
.. note ::
This requires `scipy` to be installed
Kinetics-400
Image classification
~~~~~~~~~~~~~~~~~~~~
.. autosummary::
:toctree: generated/
:template: class_dataset.rst
Caltech101
Caltech256
CelebA
CIFAR10
CIFAR100
Country211
DTD
EMNIST
EuroSAT
FakeData
FashionMNIST
FER2013
FGVCAircraft
Flickr8k
Flickr30k
Flowers102
Food101
GTSRB
INaturalist
ImageNet
Imagenette
KMNIST
LFWPeople
LSUN
MNIST
Omniglot
OxfordIIITPet
Places365
PCAM
QMNIST
RenderedSST2
SEMEION
SBU
StanfordCars
STL10
SUN397
SVHN
USPS
Image detection or segmentation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autosummary::
:toctree: generated/
:template: class_dataset.rst
CocoDetection
CelebA
Cityscapes
Kitti
OxfordIIITPet
SBDataset
VOCSegmentation
VOCDetection
WIDERFace
Optical Flow
~~~~~~~~~~~~
.. autoclass:: Kinetics400
:members: __getitem__
:special-members:
KITTI
~~~~~~~~~
.. autoclass:: Kitti
:members: __getitem__
:special-members:
KMNIST
~~~~~~~~~~~~~
.. autoclass:: KMNIST
LSUN
~~~~
.. autoclass:: LSUN
:members: __getitem__
:special-members:
MNIST
~~~~~
.. autoclass:: MNIST
Omniglot
~~~~~~~~
.. autoclass:: Omniglot
PhotoTour
~~~~~~~~~
.. autoclass:: PhotoTour
:members: __getitem__
:special-members:
Places365
~~~~~~~~~
.. autoclass:: Places365
:members: __getitem__
:special-members:
QMNIST
~~~~~~
.. autoclass:: QMNIST
SBD
~~~~~~
.. autoclass:: SBDataset
:members: __getitem__
:special-members:
SBU
~~~
.. autoclass:: SBU
:members: __getitem__
:special-members:
SEMEION
~~~~~~~
.. autoclass:: SEMEION
:members: __getitem__
:special-members:
STL10
~~~~~
.. autoclass:: STL10
:members: __getitem__
:special-members:
SVHN
~~~~~
.. autosummary::
:toctree: generated/
:template: class_dataset.rst
FlyingChairs
FlyingThings3D
HD1K
KittiFlow
Sintel
Stereo Matching
~~~~~~~~~~~~~~~
.. autosummary::
:toctree: generated/
:template: class_dataset.rst
CarlaStereo
Kitti2012Stereo
Kitti2015Stereo
CREStereo
FallingThingsStereo
SceneFlowStereo
SintelStereo
InStereo2k
ETH3DStereo
Middlebury2014Stereo
Image pairs
~~~~~~~~~~~
.. autoclass:: SVHN
:members: __getitem__
:special-members:
.. autosummary::
:toctree: generated/
:template: class_dataset.rst
UCF101
~~~~~~~
LFWPairs
PhotoTour
.. autoclass:: UCF101
:members: __getitem__
:special-members:
Image captioning
~~~~~~~~~~~~~~~~
USPS
~~~~~
.. autosummary::
:toctree: generated/
:template: class_dataset.rst
.. autoclass:: USPS
:members: __getitem__
:special-members:
CocoCaptions
VOC
~~~~~~
Video classification
~~~~~~~~~~~~~~~~~~~~
.. autoclass:: VOCSegmentation
:members: __getitem__
:special-members:
.. autosummary::
:toctree: generated/
:template: class_dataset.rst
.. autoclass:: VOCDetection
:members: __getitem__
:special-members:
HMDB51
Kinetics
UCF101
WIDERFace
~~~~~~~~~
Video prediction
~~~~~~~~~~~~~~~~~~~~
.. autoclass:: WIDERFace
:members: __getitem__
:special-members:
.. autosummary::
:toctree: generated/
:template: class_dataset.rst
MovingMNIST
.. _base_classes_datasets:
Base classes for custom datasets
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--------------------------------
.. autosummary::
:toctree: generated/
:template: class.rst
DatasetFolder
ImageFolder
VisionDataset
.. autoclass:: DatasetFolder
:members: __getitem__, find_classes, make_dataset
:special-members:
Transforms v2
-------------
.. autosummary::
:toctree: generated/
:template: function.rst
.. autoclass:: ImageFolder
:members: __getitem__
:special-members:
wrap_dataset_for_transforms_v2
# Necessary for the table generated by autosummary to look decent
[html writers]
table_style: colwidths-auto
Feature extraction for model inspection
=======================================
.. currentmodule:: torchvision.models.feature_extraction
The ``torchvision.models.feature_extraction`` package contains
feature extraction utilities that let us tap into our models to access intermediate
transformations of our inputs. This could be useful for a variety of
applications in computer vision. Just a few examples are:
- Visualizing feature maps.
- Extracting features to compute image descriptors for tasks like facial
recognition, copy-detection, or image retrieval.
- Passing selected features to downstream sub-networks for end-to-end training
with a specific task in mind. For example, passing a hierarchy of features
to a Feature Pyramid Network with object detection heads.
Torchvision provides :func:`create_feature_extractor` for this purpose.
It works by following roughly these steps:
1. Symbolically tracing the model to get a graphical representation of
how it transforms the input, step by step.
2. Setting the user-selected graph nodes as outputs.
3. Removing all redundant nodes (anything downstream of the output nodes).
4. Generating python code from the resulting graph and bundling that into a
PyTorch module together with the graph itself.
|
The `torch.fx documentation <https://pytorch.org/docs/stable/fx.html>`_
provides a more general and detailed explanation of the above procedure and
the inner workings of the symbolic tracing.
.. _about-node-names:
**About Node Names**
In order to specify which nodes should be output nodes for extracted
features, one should be familiar with the node naming convention used here
(which differs slightly from that used in ``torch.fx``). A node name is
specified as a ``.`` separated path walking the module hierarchy from top level
module down to leaf operation or leaf module. For instance ``"layer4.2.relu"``
in ResNet-50 represents the output of the ReLU of the 2nd block of the 4th
layer of the ``ResNet`` module. Here are some finer points to keep in mind:
- When specifying node names for :func:`create_feature_extractor`, you may
provide a truncated version of a node name as a shortcut. To see how this
works, try creating a ResNet-50 model and printing the node names with
``train_nodes, _ = get_graph_node_names(model) print(train_nodes)`` and
observe that the last node pertaining to ``layer4`` is
``"layer4.2.relu_2"``. One may specify ``"layer4.2.relu_2"`` as the return
node, or just ``"layer4"`` as this, by convention, refers to the last node
(in order of execution) of ``layer4``.
- If a certain module or operation is repeated more than once, node names get
an additional ``_{int}`` postfix to disambiguate. For instance, maybe the
addition (``+``) operation is used three times in the same ``forward``
method. Then there would be ``"path.to.module.add"``,
``"path.to.module.add_1"``, ``"path.to.module.add_2"``. The counter is
maintained within the scope of the direct parent. So in ResNet-50 there is
a ``"layer4.1.add"`` and a ``"layer4.2.add"``. Because the addition
operations reside in different blocks, there is no need for a postfix to
disambiguate.
**An Example**
Here is an example of how we might extract features for MaskRCNN:
.. code-block:: python
import torch
from torchvision.models import resnet50
from torchvision.models.feature_extraction import get_graph_node_names
from torchvision.models.feature_extraction import create_feature_extractor
from torchvision.models.detection.mask_rcnn import MaskRCNN
from torchvision.models.detection.backbone_utils import LastLevelMaxPool
from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork
# To assist you in designing the feature extractor you may want to print out
# the available nodes for resnet50.
m = resnet50()
train_nodes, eval_nodes = get_graph_node_names(resnet50())
# The lists returned, are the names of all the graph nodes (in order of
# execution) for the input model traced in train mode and in eval mode
# respectively. You'll find that `train_nodes` and `eval_nodes` are the same
# for this example. But if the model contains control flow that's dependent
# on the training mode, they may be different.
# To specify the nodes you want to extract, you could select the final node
# that appears in each of the main layers:
return_nodes = {
# node_name: user-specified key for output dict
'layer1.2.relu_2': 'layer1',
'layer2.3.relu_2': 'layer2',
'layer3.5.relu_2': 'layer3',
'layer4.2.relu_2': 'layer4',
}
# But `create_feature_extractor` can also accept truncated node specifications
# like "layer1", as it will just pick the last node that's a descendent of
# of the specification. (Tip: be careful with this, especially when a layer
# has multiple outputs. It's not always guaranteed that the last operation
# performed is the one that corresponds to the output you desire. You should
# consult the source code for the input model to confirm.)
return_nodes = {
'layer1': 'layer1',
'layer2': 'layer2',
'layer3': 'layer3',
'layer4': 'layer4',
}
# Now you can build the feature extractor. This returns a module whose forward
# method returns a dictionary like:
# {
# 'layer1': output of layer 1,
# 'layer2': output of layer 2,
# 'layer3': output of layer 3,
# 'layer4': output of layer 4,
# }
create_feature_extractor(m, return_nodes=return_nodes)
# Let's put all that together to wrap resnet50 with MaskRCNN
# MaskRCNN requires a backbone with an attached FPN
class Resnet50WithFPN(torch.nn.Module):
def __init__(self):
super(Resnet50WithFPN, self).__init__()
# Get a resnet50 backbone
m = resnet50()
# Extract 4 main layers (note: MaskRCNN needs this particular name
# mapping for return nodes)
self.body = create_feature_extractor(
m, return_nodes={f'layer{k}': str(v)
for v, k in enumerate([1, 2, 3, 4])})
# Dry run to get number of channels for FPN
inp = torch.randn(2, 3, 224, 224)
with torch.no_grad():
out = self.body(inp)
in_channels_list = [o.shape[1] for o in out.values()]
# Build FPN
self.out_channels = 256
self.fpn = FeaturePyramidNetwork(
in_channels_list, out_channels=self.out_channels,
extra_blocks=LastLevelMaxPool())
def forward(self, x):
x = self.body(x)
x = self.fpn(x)
return x
# Now we can build our model!
model = MaskRCNN(Resnet50WithFPN(), num_classes=91).eval()
API Reference
-------------
.. autosummary::
:toctree: generated/
:template: function.rst
create_feature_extractor
get_graph_node_names
......@@ -31,18 +31,21 @@ architectures, and common image transformations for computer vision.
:maxdepth: 2
:caption: Package Reference
datasets
io
models
ops
transforms
tv_tensors
models
datasets
utils
ops
io
feature_extraction
.. toctree::
:maxdepth: 1
:caption: Examples
:caption: Examples and training references
auto_examples/index
training_references
.. automodule:: torchvision
:members:
......@@ -58,3 +61,9 @@ architectures, and common image transformations for computer vision.
TorchElastic <https://pytorch.org/elastic/>
TorchServe <https://pytorch.org/serve>
PyTorch on XLA Devices <http://pytorch.org/xla/>
Indices
-------
* :ref:`genindex`
torchvision.io
==============
Decoding / Encoding images and videos
=====================================
.. currentmodule:: torchvision.io
The :mod:`torchvision.io` package provides functions for performing IO
operations. They are currently specific to reading and writing video and
images.
operations. They are currently specific to reading and writing images and
videos.
Images
------
.. autosummary::
:toctree: generated/
:template: function.rst
read_image
decode_image
encode_jpeg
decode_jpeg
write_jpeg
decode_gif
encode_png
decode_png
write_png
read_file
write_file
.. autosummary::
:toctree: generated/
:template: class.rst
ImageReadMode
Video
-----
.. autofunction:: read_video
.. autofunction:: read_video_timestamps
.. autosummary::
:toctree: generated/
:template: function.rst
.. autofunction:: write_video
read_video
read_video_timestamps
write_video
Fine-grained video API
----------------------
^^^^^^^^^^^^^^^^^^^^^^
In addition to the :mod:`read_video` function, we provide a high-performance
lower-level API for more fine-grained control compared to the :mod:`read_video` function.
It does all this whilst fully supporting torchscript.
.. autoclass:: VideoReader
:members: __next__, get_metadata, set_current_stream, seek
.. betastatus:: fine-grained video API
.. autosummary::
:toctree: generated/
:template: class.rst
VideoReader
Example of inspecting a video:
......@@ -54,29 +88,3 @@ Example of inspecting a video:
# the constructor we select a default video stream, but
# in practice, we can set whichever stream we would like
video.set_current_stream("video:0")
Image
-----
.. autoclass:: ImageReadMode
.. autofunction:: read_image
.. autofunction:: decode_image
.. autofunction:: encode_jpeg
.. autofunction:: decode_jpeg
.. autofunction:: write_jpeg
.. autofunction:: encode_png
.. autofunction:: decode_png
.. autofunction:: write_png
.. autofunction:: read_file
.. autofunction:: write_file
This diff is collapsed.
AlexNet
=======
.. currentmodule:: torchvision.models
The AlexNet model was originally introduced in the
`ImageNet Classification with Deep Convolutional Neural Networks
<https://papers.nips.cc/paper/2012/hash/c399862d3b9d6b76c8436e924a68c45b-Abstract.html>`__
paper. The implemented architecture is slightly different from the original one,
and is based on `One weird trick for parallelizing convolutional neural networks
<https://arxiv.org/abs/1404.5997>`__.
Model builders
--------------
The following model builders can be used to instantiate an AlexNet model, with or
without pre-trained weights. All the model builders internally rely on the
``torchvision.models.alexnet.AlexNet`` base class. Please refer to the `source
code
<https://github.com/pytorch/vision/blob/main/torchvision/models/alexnet.py>`_ for
more details about this class.
.. autosummary::
:toctree: generated/
:template: function.rst
alexnet
ConvNeXt
========
.. currentmodule:: torchvision.models
The ConvNeXt model is based on the `A ConvNet for the 2020s
<https://arxiv.org/abs/2201.03545>`_ paper.
Model builders
--------------
The following model builders can be used to instantiate a ConvNeXt model, with or
without pre-trained weights. All the model builders internally rely on the
``torchvision.models.convnext.ConvNeXt`` base class. Please refer to the `source code
<https://github.com/pytorch/vision/blob/main/torchvision/models/convnext.py>`_ for
more details about this class.
.. autosummary::
:toctree: generated/
:template: function.rst
convnext_tiny
convnext_small
convnext_base
convnext_large
DeepLabV3
=========
.. currentmodule:: torchvision.models.segmentation
The DeepLabV3 model is based on the `Rethinking Atrous Convolution for Semantic
Image Segmentation <https://arxiv.org/abs/1706.05587>`__ paper.
.. betastatus:: segmentation module
Model builders
--------------
The following model builders can be used to instantiate a DeepLabV3 model with
different backbones, with or without pre-trained weights. All the model builders
internally rely on the ``torchvision.models.segmentation.deeplabv3.DeepLabV3`` base class. Please
refer to the `source code
<https://github.com/pytorch/vision/blob/main/torchvision/models/segmentation/deeplabv3.py>`_
for more details about this class.
.. autosummary::
:toctree: generated/
:template: function.rst
deeplabv3_mobilenet_v3_large
deeplabv3_resnet50
deeplabv3_resnet101
DenseNet
========
.. currentmodule:: torchvision.models
The DenseNet model is based on the `Densely Connected Convolutional Networks
<https://arxiv.org/abs/1608.06993>`_ paper.
Model builders
--------------
The following model builders can be used to instantiate a DenseNet model, with or
without pre-trained weights. All the model builders internally rely on the
``torchvision.models.densenet.DenseNet`` base class. Please refer to the `source
code
<https://github.com/pytorch/vision/blob/main/torchvision/models/densenet.py>`_ for
more details about this class.
.. autosummary::
:toctree: generated/
:template: function.rst
densenet121
densenet161
densenet169
densenet201
EfficientNet
============
.. currentmodule:: torchvision.models
The EfficientNet model is based on the `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`__
paper.
Model builders
--------------
The following model builders can be used to instantiate an EfficientNet model, with or
without pre-trained weights. All the model builders internally rely on the
``torchvision.models.efficientnet.EfficientNet`` base class. Please refer to the `source
code
<https://github.com/pytorch/vision/blob/main/torchvision/models/efficientnet.py>`_ for
more details about this class.
.. autosummary::
:toctree: generated/
:template: function.rst
efficientnet_b0
efficientnet_b1
efficientnet_b2
efficientnet_b3
efficientnet_b4
efficientnet_b5
efficientnet_b6
efficientnet_b7
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment