Unverified Commit eb00e2ad authored by Nicolas Hug's avatar Nicolas Hug Committed by GitHub
Browse files

Fix sphinx warnings and turn warnings into errors (#3290)


Co-authored-by: default avatarVasilis Vryniotis <datumbox@users.noreply.github.com>
parent 59d3af53
......@@ -2,7 +2,7 @@
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXOPTS = -W # turn warnings into errors
SPHINXBUILD = sphinx-build
SPHINXPROJ = torchvision
SOURCEDIR = source
......
......@@ -23,8 +23,23 @@
import torch
import torchvision
import pytorch_sphinx_theme
from sphinxcontrib import googleanalytics
# Wrap sphinxcontrib-googleanalytics setup() function to avoid a Sphinx warning:
# "WARNING: extension ‘sphinxcontrib.googleanalytics’ returned an unsupported
# object from its setup() function; it should return None or a metadata
# dictionary"
_googleanalytics_setup_original = googleanalytics.setup
def _googleanalytics_setup_wrapper(app):
_googleanalytics_setup_original(app)
return {"version": "0.1"}
googleanalytics.setup = _googleanalytics_setup_wrapper
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
......@@ -48,6 +63,8 @@ extensions = [
]
napoleon_use_ivar = True
napoleon_numpy_docstring = False
napoleon_google_docstring = True
googleanalytics_id = 'UA-90545585-1'
googleanalytics_enabled = True
......
......@@ -155,7 +155,7 @@ MNIST
.. autoclass:: MNIST
Omniglot
~~~~~~
~~~~~~~~
.. autoclass:: Omniglot
......
......@@ -18,7 +18,7 @@ Video
Fine-grained video API
-------------------
----------------------
In addition to the :mod:`read_video` function, we provide a high-performance
lower-level API for more fine-grained control compared to the :mod:`read_video` function.
......
......@@ -17,12 +17,15 @@ class CelebA(VisionDataset):
target_type (string or list, optional): Type of target to use, ``attr``, ``identity``, ``bbox``,
or ``landmarks``. Can also be a list to output a tuple with all specified target types.
The targets represent:
``attr`` (np.array shape=(40,) dtype=int): binary (0, 1) labels for attributes
``identity`` (int): label for each person (data points with the same identity are the same person)
``bbox`` (np.array shape=(4,) dtype=int): bounding box (x, y, width, height)
``landmarks`` (np.array shape=(10,) dtype=int): landmark points (lefteye_x, lefteye_y, righteye_x,
righteye_y, nose_x, nose_y, leftmouth_x, leftmouth_y, rightmouth_x, rightmouth_y)
- ``attr`` (np.array shape=(40,) dtype=int): binary (0, 1) labels for attributes
- ``identity`` (int): label for each person (data points with the same identity are the same person)
- ``bbox`` (np.array shape=(4,) dtype=int): bounding box (x, y, width, height)
- ``landmarks`` (np.array shape=(10,) dtype=int): landmark points (lefteye_x, lefteye_y, righteye_x,
righteye_y, nose_x, nose_y, leftmouth_x, leftmouth_y, rightmouth_x, rightmouth_y)
Defaults to ``attr``. If empty, ``None`` will be returned as target.
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.ToTensor``
target_transform (callable, optional): A function/transform that takes in the
......
......@@ -37,10 +37,12 @@ class HMDB51(VisionDataset):
and returns a transformed version.
Returns:
video (Tensor[T, H, W, C]): the `T` video frames
audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
and `L` is the number of points
label (int): class of the video clip
tuple: A 3-tuple with the following entries:
- video (Tensor[T, H, W, C]): The `T` video frames
- audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
and `L` is the number of points
- label (int): class of the video clip
"""
data_url = "http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/hmdb51_org.rar"
......
......@@ -30,10 +30,12 @@ class Kinetics400(VisionDataset):
and returns a transformed version.
Returns:
video (Tensor[T, H, W, C]): the `T` video frames
audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
and `L` is the number of points
label (int): class of the video clip
tuple: A 3-tuple with the following entries:
- video (Tensor[T, H, W, C]): the `T` video frames
- audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
and `L` is the number of points
- label (int): class of the video clip
"""
def __init__(self, root, frames_per_clip, step_between_clips=1, frame_rate=None,
......
......@@ -318,7 +318,7 @@ class QMNIST(MNIST):
"""`QMNIST <https://github.com/facebookresearch/qmnist>`_ Dataset.
Args:
root (string): Root directory of dataset whose ``processed''
root (string): Root directory of dataset whose ``processed``
subdir contains torch binary files with the datasets.
what (string,optional): Can be 'train', 'test', 'test10k',
'test50k', or 'nist' for respectively the mnist compatible
......@@ -342,7 +342,6 @@ class QMNIST(MNIST):
train (bool,optional,compatibility): When argument 'what' is
not specified, this boolean decides whether to load the
training set ot the testing set. Default: True.
"""
subsets = {
......
......@@ -7,6 +7,7 @@ from .utils import download_and_extract_archive, check_integrity, list_dir, list
class Omniglot(VisionDataset):
"""`Omniglot <https://github.com/brendenlake/omniglot>`_ Dataset.
Args:
root (string): Root directory of dataset where directory
``omniglot-py`` exists.
......
......@@ -18,7 +18,7 @@ class STL10(VisionDataset):
Accordingly dataset is selected.
folds (int, optional): One of {0-9} or None.
For training, loads one of the 10 pre-defined folds of 1k samples for the
standard evaluation procedure. If no value is passed, loads the 5k samples.
standard evaluation procedure. If no value is passed, loads the 5k samples.
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.RandomCrop``
target_transform (callable, optional): A function/transform that takes in the
......@@ -26,7 +26,6 @@ class STL10(VisionDataset):
download (bool, optional): If true, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""
base_folder = 'stl10_binary'
url = "http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz"
......
......@@ -35,10 +35,12 @@ class UCF101(VisionDataset):
and returns a transformed version.
Returns:
video (Tensor[T, H, W, C]): the `T` video frames
audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
and `L` is the number of points
label (int): class of the video clip
tuple: A 3-tuple with the following entries:
- video (Tensor[T, H, W, C]): the `T` video frames
- audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
and `L` is the number of points
- label (int): class of the video clip
"""
def __init__(self, root, annotation_path, frames_per_clip, step_between_clips=1,
......
......@@ -51,28 +51,33 @@ class VideoReader:
Example:
The following examples creates a :mod:`VideoReader` object, seeks into 2s
point, and returns a single frame::
import torchvision
video_path = "path_to_a_test_video"
reader = torchvision.io.VideoReader(video_path, "video")
reader.seek(2.0)
frame = next(reader)
import torchvision
video_path = "path_to_a_test_video"
reader = torchvision.io.VideoReader(video_path, "video")
reader.seek(2.0)
frame = next(reader)
:mod:`VideoReader` implements the iterable API, which makes it suitable to
using it in conjunction with :mod:`itertools` for more advanced reading.
As such, we can use a :mod:`VideoReader` instance inside for loops::
reader.seek(2)
for frame in reader:
frames.append(frame['data'])
# additionally, `seek` implements a fluent API, so we can do
for frame in reader.seek(2):
frames.append(frame['data'])
With :mod:`itertools`, we can read all frames between 2 and 5 seconds with the
following code::
for frame in itertools.takewhile(lambda x: x['pts'] <= 5, reader.seek(2)):
frames.append(frame['data'])
and similarly, reading 10 frames after the 2s timestamp can be achieved
as follows::
for frame in itertools.islice(reader.seek(2), 10):
frames.append(frame['data'])
......
......@@ -126,8 +126,8 @@ def encode_png(input: torch.Tensor, compression_level: int = 6) -> torch.Tensor:
between 0 and 9. Default: 6
Returns:
output (Tensor[1]): A one dimensional int8 tensor that contains the raw bytes of the
PNG file.
Tensor[1]: A one dimensional int8 tensor that contains the raw bytes of the
PNG file.
"""
output = torch.ops.image.encode_png(input, compression_level)
return output
......
......@@ -253,10 +253,8 @@ def read_video(
Returns:
vframes (Tensor[T, H, W, C]): the `T` video frames
aframes (Tensor[K, L]): the audio frames, where `K` is the number of channels and `L` is the
number of points
info (Dict): metadata for the video and audio. Can contain the fields video_fps (float)
and audio_fps (int)
aframes (Tensor[K, L]): the audio frames, where `K` is the number of channels and `L` is the number of points
info (Dict): metadata for the video and audio. Can contain the fields video_fps (float) and audio_fps (int)
"""
from torchvision import get_video_backend
......
......@@ -308,6 +308,7 @@ def fasterrcnn_resnet50_fpn(pretrained=False, progress=True,
During training, the model expects both the input tensors, as well as a targets (list of dictionary),
containing:
- boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
- labels (``Int64Tensor[N]``): the class label for each ground-truth box
......@@ -318,6 +319,7 @@ def fasterrcnn_resnet50_fpn(pretrained=False, progress=True,
During inference, the model requires only the input tensors, and returns the post-processed
predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
follows:
- boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
- labels (``Int64Tensor[N]``): the predicted labels for each image
......
......@@ -26,6 +26,7 @@ class KeypointRCNN(FasterRCNN):
During training, the model expects both the input tensors, as well as a targets (list of dictionary),
containing:
- boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values of x
between 0 and W and values of y between 0 and H
- labels (Int64Tensor[N]): the class label for each ground-truth box
......@@ -38,6 +39,7 @@ class KeypointRCNN(FasterRCNN):
During inference, the model requires only the input tensors, and returns the post-processed
predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as
follows:
- boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values of x
between 0 and W and values of y between 0 and H
- labels (Int64Tensor[N]): the predicted labels for each image
......@@ -283,6 +285,7 @@ def keypointrcnn_resnet50_fpn(pretrained=False, progress=True,
During training, the model expects both the input tensors, as well as a targets (list of dictionary),
containing:
- boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
- labels (``Int64Tensor[N]``): the class label for each ground-truth box
......@@ -295,6 +298,7 @@ def keypointrcnn_resnet50_fpn(pretrained=False, progress=True,
During inference, the model requires only the input tensors, and returns the post-processed
predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
follows:
- boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
- labels (``Int64Tensor[N]``): the predicted labels for each image
......
......@@ -278,6 +278,7 @@ def maskrcnn_resnet50_fpn(pretrained=False, progress=True,
During training, the model expects both the input tensors, as well as a targets (list of dictionary),
containing:
- boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
- labels (``Int64Tensor[N]``): the class label for each ground-truth box
......@@ -289,6 +290,7 @@ def maskrcnn_resnet50_fpn(pretrained=False, progress=True,
During inference, the model requires only the input tensors, and returns the post-processed
predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
follows:
- boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
- labels (``Int64Tensor[N]``): the predicted labels for each image
......
......@@ -575,6 +575,7 @@ def retinanet_resnet50_fpn(pretrained=False, progress=True,
During training, the model expects both the input tensors, as well as a targets (list of dictionary),
containing:
- boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values
between ``0`` and ``H`` and ``0`` and ``W``
- labels (``Int64Tensor[N]``): the class label for each ground-truth box
......@@ -585,6 +586,7 @@ def retinanet_resnet50_fpn(pretrained=False, progress=True,
During inference, the model requires only the input tensors, and returns the post-processed
predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
follows:
- boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values between
``0`` and ``H`` and ``0`` and ``W``
- labels (``Int64Tensor[N]``): the predicted labels for each image
......
......@@ -982,9 +982,9 @@ def affine(
of length 1: ``[value, ]``.
If input is PIL Image, the options is only available for ``Pillow>=5.0.0``.
fillcolor (sequence, int, float): deprecated argument and will be removed since v0.10.0.
Please use `arg`:fill: instead.
Please use the ``fill`` parameter instead.
resample (int, optional): deprecated argument and will be removed since v0.10.0.
Please use `arg`:interpolation: instead.
Please use the ``interpolation`` parameter instead.
Returns:
PIL Image or Tensor: Transformed image.
......
......@@ -1179,7 +1179,7 @@ class RandomRotation(torch.nn.Module):
image. If given a number, the value is used for all bands respectively.
If input is PIL Image, the options is only available for ``Pillow>=5.2.0``.
resample (int, optional): deprecated argument and will be removed since v0.10.0.
Please use `arg`:interpolation: instead.
Please use the ``interpolation`` parameter instead.
.. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
......@@ -1284,9 +1284,9 @@ class RandomAffine(torch.nn.Module):
image. If given a number, the value is used for all bands respectively.
If input is PIL Image, the options is only available for ``Pillow>=5.0.0``.
fillcolor (sequence or number, optional): deprecated argument and will be removed since v0.10.0.
Please use `arg`:fill: instead.
Please use the ``fill`` parameter instead.
resample (int, optional): deprecated argument and will be removed since v0.10.0.
Please use `arg`:interpolation: instead.
Please use the ``interpolation`` parameter instead.
.. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment