Unverified Commit eb00e2ad authored by Nicolas Hug's avatar Nicolas Hug Committed by GitHub
Browse files

Fix sphinx warnings and turn warnings into errors (#3290)


Co-authored-by: default avatarVasilis Vryniotis <datumbox@users.noreply.github.com>
parent 59d3af53
......@@ -2,7 +2,7 @@
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXOPTS = -W # turn warnings into errors
SPHINXBUILD = sphinx-build
SPHINXPROJ = torchvision
SOURCEDIR = source
......
......@@ -23,8 +23,23 @@
import torch
import torchvision
import pytorch_sphinx_theme
from sphinxcontrib import googleanalytics
# Wrap sphinxcontrib-googleanalytics setup() function to avoid a Sphinx warning:
# "WARNING: extension ‘sphinxcontrib.googleanalytics’ returned an unsupported
# object from its setup() function; it should return None or a metadata
# dictionary"
_googleanalytics_setup_original = googleanalytics.setup
def _googleanalytics_setup_wrapper(app):
_googleanalytics_setup_original(app)
return {"version": "0.1"}
googleanalytics.setup = _googleanalytics_setup_wrapper
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
......@@ -48,6 +63,8 @@ extensions = [
]
napoleon_use_ivar = True
napoleon_numpy_docstring = False
napoleon_google_docstring = True
googleanalytics_id = 'UA-90545585-1'
googleanalytics_enabled = True
......
......@@ -155,7 +155,7 @@ MNIST
.. autoclass:: MNIST
Omniglot
~~~~~~
~~~~~~~~
.. autoclass:: Omniglot
......
......@@ -18,7 +18,7 @@ Video
Fine-grained video API
-------------------
----------------------
In addition to the :mod:`read_video` function, we provide a high-performance
lower-level API for more fine-grained control compared to the :mod:`read_video` function.
......
......@@ -17,12 +17,15 @@ class CelebA(VisionDataset):
target_type (string or list, optional): Type of target to use, ``attr``, ``identity``, ``bbox``,
or ``landmarks``. Can also be a list to output a tuple with all specified target types.
The targets represent:
``attr`` (np.array shape=(40,) dtype=int): binary (0, 1) labels for attributes
``identity`` (int): label for each person (data points with the same identity are the same person)
``bbox`` (np.array shape=(4,) dtype=int): bounding box (x, y, width, height)
``landmarks`` (np.array shape=(10,) dtype=int): landmark points (lefteye_x, lefteye_y, righteye_x,
- ``attr`` (np.array shape=(40,) dtype=int): binary (0, 1) labels for attributes
- ``identity`` (int): label for each person (data points with the same identity are the same person)
- ``bbox`` (np.array shape=(4,) dtype=int): bounding box (x, y, width, height)
- ``landmarks`` (np.array shape=(10,) dtype=int): landmark points (lefteye_x, lefteye_y, righteye_x,
righteye_y, nose_x, nose_y, leftmouth_x, leftmouth_y, rightmouth_x, rightmouth_y)
Defaults to ``attr``. If empty, ``None`` will be returned as target.
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.ToTensor``
target_transform (callable, optional): A function/transform that takes in the
......
......@@ -37,10 +37,12 @@ class HMDB51(VisionDataset):
and returns a transformed version.
Returns:
video (Tensor[T, H, W, C]): the `T` video frames
audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
tuple: A 3-tuple with the following entries:
- video (Tensor[T, H, W, C]): The `T` video frames
- audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
and `L` is the number of points
label (int): class of the video clip
- label (int): class of the video clip
"""
data_url = "http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/hmdb51_org.rar"
......
......@@ -30,10 +30,12 @@ class Kinetics400(VisionDataset):
and returns a transformed version.
Returns:
video (Tensor[T, H, W, C]): the `T` video frames
audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
tuple: A 3-tuple with the following entries:
- video (Tensor[T, H, W, C]): the `T` video frames
- audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
and `L` is the number of points
label (int): class of the video clip
- label (int): class of the video clip
"""
def __init__(self, root, frames_per_clip, step_between_clips=1, frame_rate=None,
......
......@@ -318,7 +318,7 @@ class QMNIST(MNIST):
"""`QMNIST <https://github.com/facebookresearch/qmnist>`_ Dataset.
Args:
root (string): Root directory of dataset whose ``processed''
root (string): Root directory of dataset whose ``processed``
subdir contains torch binary files with the datasets.
what (string,optional): Can be 'train', 'test', 'test10k',
'test50k', or 'nist' for respectively the mnist compatible
......@@ -342,7 +342,6 @@ class QMNIST(MNIST):
train (bool,optional,compatibility): When argument 'what' is
not specified, this boolean decides whether to load the
training set ot the testing set. Default: True.
"""
subsets = {
......
......@@ -7,6 +7,7 @@ from .utils import download_and_extract_archive, check_integrity, list_dir, list
class Omniglot(VisionDataset):
"""`Omniglot <https://github.com/brendenlake/omniglot>`_ Dataset.
Args:
root (string): Root directory of dataset where directory
``omniglot-py`` exists.
......
......@@ -26,7 +26,6 @@ class STL10(VisionDataset):
download (bool, optional): If true, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""
base_folder = 'stl10_binary'
url = "http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz"
......
......@@ -35,10 +35,12 @@ class UCF101(VisionDataset):
and returns a transformed version.
Returns:
video (Tensor[T, H, W, C]): the `T` video frames
audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
tuple: A 3-tuple with the following entries:
- video (Tensor[T, H, W, C]): the `T` video frames
- audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
and `L` is the number of points
label (int): class of the video clip
- label (int): class of the video clip
"""
def __init__(self, root, annotation_path, frames_per_clip, step_between_clips=1,
......
......@@ -51,9 +51,9 @@ class VideoReader:
Example:
The following examples creates a :mod:`VideoReader` object, seeks into 2s
point, and returns a single frame::
import torchvision
video_path = "path_to_a_test_video"
reader = torchvision.io.VideoReader(video_path, "video")
reader.seek(2.0)
frame = next(reader)
......@@ -61,18 +61,23 @@ class VideoReader:
:mod:`VideoReader` implements the iterable API, which makes it suitable to
using it in conjunction with :mod:`itertools` for more advanced reading.
As such, we can use a :mod:`VideoReader` instance inside for loops::
reader.seek(2)
for frame in reader:
frames.append(frame['data'])
# additionally, `seek` implements a fluent API, so we can do
for frame in reader.seek(2):
frames.append(frame['data'])
With :mod:`itertools`, we can read all frames between 2 and 5 seconds with the
following code::
for frame in itertools.takewhile(lambda x: x['pts'] <= 5, reader.seek(2)):
frames.append(frame['data'])
and similarly, reading 10 frames after the 2s timestamp can be achieved
as follows::
for frame in itertools.islice(reader.seek(2), 10):
frames.append(frame['data'])
......
......@@ -126,7 +126,7 @@ def encode_png(input: torch.Tensor, compression_level: int = 6) -> torch.Tensor:
between 0 and 9. Default: 6
Returns:
output (Tensor[1]): A one dimensional int8 tensor that contains the raw bytes of the
Tensor[1]: A one dimensional int8 tensor that contains the raw bytes of the
PNG file.
"""
output = torch.ops.image.encode_png(input, compression_level)
......
......@@ -253,10 +253,8 @@ def read_video(
Returns:
vframes (Tensor[T, H, W, C]): the `T` video frames
aframes (Tensor[K, L]): the audio frames, where `K` is the number of channels and `L` is the
number of points
info (Dict): metadata for the video and audio. Can contain the fields video_fps (float)
and audio_fps (int)
aframes (Tensor[K, L]): the audio frames, where `K` is the number of channels and `L` is the number of points
info (Dict): metadata for the video and audio. Can contain the fields video_fps (float) and audio_fps (int)
"""
from torchvision import get_video_backend
......
......@@ -308,6 +308,7 @@ def fasterrcnn_resnet50_fpn(pretrained=False, progress=True,
During training, the model expects both the input tensors, as well as a targets (list of dictionary),
containing:
- boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
- labels (``Int64Tensor[N]``): the class label for each ground-truth box
......@@ -318,6 +319,7 @@ def fasterrcnn_resnet50_fpn(pretrained=False, progress=True,
During inference, the model requires only the input tensors, and returns the post-processed
predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
follows:
- boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
- labels (``Int64Tensor[N]``): the predicted labels for each image
......
......@@ -26,6 +26,7 @@ class KeypointRCNN(FasterRCNN):
During training, the model expects both the input tensors, as well as a targets (list of dictionary),
containing:
- boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values of x
between 0 and W and values of y between 0 and H
- labels (Int64Tensor[N]): the class label for each ground-truth box
......@@ -38,6 +39,7 @@ class KeypointRCNN(FasterRCNN):
During inference, the model requires only the input tensors, and returns the post-processed
predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as
follows:
- boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values of x
between 0 and W and values of y between 0 and H
- labels (Int64Tensor[N]): the predicted labels for each image
......@@ -283,6 +285,7 @@ def keypointrcnn_resnet50_fpn(pretrained=False, progress=True,
During training, the model expects both the input tensors, as well as a targets (list of dictionary),
containing:
- boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
- labels (``Int64Tensor[N]``): the class label for each ground-truth box
......@@ -295,6 +298,7 @@ def keypointrcnn_resnet50_fpn(pretrained=False, progress=True,
During inference, the model requires only the input tensors, and returns the post-processed
predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
follows:
- boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
- labels (``Int64Tensor[N]``): the predicted labels for each image
......
......@@ -278,6 +278,7 @@ def maskrcnn_resnet50_fpn(pretrained=False, progress=True,
During training, the model expects both the input tensors, as well as a targets (list of dictionary),
containing:
- boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
- labels (``Int64Tensor[N]``): the class label for each ground-truth box
......@@ -289,6 +290,7 @@ def maskrcnn_resnet50_fpn(pretrained=False, progress=True,
During inference, the model requires only the input tensors, and returns the post-processed
predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
follows:
- boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
- labels (``Int64Tensor[N]``): the predicted labels for each image
......
......@@ -575,6 +575,7 @@ def retinanet_resnet50_fpn(pretrained=False, progress=True,
During training, the model expects both the input tensors, as well as a targets (list of dictionary),
containing:
- boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values
between ``0`` and ``H`` and ``0`` and ``W``
- labels (``Int64Tensor[N]``): the class label for each ground-truth box
......@@ -585,6 +586,7 @@ def retinanet_resnet50_fpn(pretrained=False, progress=True,
During inference, the model requires only the input tensors, and returns the post-processed
predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
follows:
- boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values between
``0`` and ``H`` and ``0`` and ``W``
- labels (``Int64Tensor[N]``): the predicted labels for each image
......
......@@ -982,9 +982,9 @@ def affine(
of length 1: ``[value, ]``.
If input is PIL Image, the options is only available for ``Pillow>=5.0.0``.
fillcolor (sequence, int, float): deprecated argument and will be removed since v0.10.0.
Please use `arg`:fill: instead.
Please use the ``fill`` parameter instead.
resample (int, optional): deprecated argument and will be removed since v0.10.0.
Please use `arg`:interpolation: instead.
Please use the ``interpolation`` parameter instead.
Returns:
PIL Image or Tensor: Transformed image.
......
......@@ -1179,7 +1179,7 @@ class RandomRotation(torch.nn.Module):
image. If given a number, the value is used for all bands respectively.
If input is PIL Image, the options is only available for ``Pillow>=5.2.0``.
resample (int, optional): deprecated argument and will be removed since v0.10.0.
Please use `arg`:interpolation: instead.
Please use the ``interpolation`` parameter instead.
.. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
......@@ -1284,9 +1284,9 @@ class RandomAffine(torch.nn.Module):
image. If given a number, the value is used for all bands respectively.
If input is PIL Image, the options is only available for ``Pillow>=5.0.0``.
fillcolor (sequence or number, optional): deprecated argument and will be removed since v0.10.0.
Please use `arg`:fill: instead.
Please use the ``fill`` parameter instead.
resample (int, optional): deprecated argument and will be removed since v0.10.0.
Please use `arg`:interpolation: instead.
Please use the ``interpolation`` parameter instead.
.. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment