merge v0.16.0

cc26cd81 · panning · f78f29f5 · fbb4cc54 · cc26cd81 · cc26cd81
Commit cc26cd81 authored Nov 27, 2023 by panning
20 changed files
--- a/android/build.gradle
+++ b/android/build.gradle
@@ -14,7 +14,7 @@ allprojects {

            androidSupportAppCompatV7Version = "28.0.0"
            fbjniJavaOnlyVersion = "0.0.3"
-            soLoaderNativeLoaderVersion = "0.10.4"
+            soLoaderNativeLoaderVersion = "0.10.5"
            pytorchAndroidVersion = "1.12"
        }


--- a/android/gradle.properties
+++ b/android/gradle.properties
 ABI_FILTERS=armeabi-v7a,arm64-v8a,x86,x86_64

-VERSION_NAME=0.14.0-SNAPSHOT
+VERSION_NAME=0.15.0-SNAPSHOT
 GROUP=org.pytorch
 MAVEN_GROUP=org.pytorch
 SONATYPE_STAGING_PROFILE=orgpytorch

--- a/android/ops/CMakeLists.txt
+++ b/android/ops/CMakeLists.txt
 cmake_minimum_required(VERSION 3.4.1)
 set(TARGET torchvision_ops)
 project(${TARGET} CXX)
-set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_STANDARD 17)

 string(APPEND CMAKE_CXX_FLAGS " -DMOBILE")


--- a/cmake/iOS.cmake
+++ b/cmake/iOS.cmake
@@ -10,11 +10,11 @@
 #   SIMULATOR - used to build for the Simulator platforms, which have an x86 arch.
 #
 # CMAKE_IOS_DEVELOPER_ROOT = automatic(default) or /path/to/platform/Developer folder
-#   By default this location is automatcially chosen based on the IOS_PLATFORM value above.
+#   By default this location is automatically chosen based on the IOS_PLATFORM value above.
 #   If set manually, it will override the default location and force the user of a particular Developer Platform
 #
 # CMAKE_IOS_SDK_ROOT = automatic(default) or /path/to/platform/Developer/SDKs/SDK folder
-#   By default this location is automatcially chosen based on the CMAKE_IOS_DEVELOPER_ROOT value.
+#   By default this location is automatically chosen based on the CMAKE_IOS_DEVELOPER_ROOT value.
 #   In this case it will always be the most up-to-date SDK found in the CMAKE_IOS_DEVELOPER_ROOT path.
 #   If set manually, this will force the use of a specific SDK version

@@ -100,7 +100,7 @@ if(IOS_DEPLOYMENT_TARGET)
  set(XCODE_IOS_PLATFORM_VERSION_FLAGS "-m${XCODE_IOS_PLATFORM}-version-min=${IOS_DEPLOYMENT_TARGET}")
 endif()

-# Hidden visibilty is required for cxx on iOS
+# Hidden visibility is required for cxx on iOS
 set(CMAKE_C_FLAGS_INIT "${XCODE_IOS_PLATFORM_VERSION_FLAGS}")
 set(CMAKE_CXX_FLAGS_INIT "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -fvisibility-inlines-hidden")


--- a/docs/Makefile
+++ b/docs/Makefile
@@ -33,6 +33,7 @@ clean:
 	rm -rf $(SOURCEDIR)/auto_examples/  # sphinx-gallery
 	rm -rf $(SOURCEDIR)/gen_modules/  # sphinx-gallery
 	rm -rf $(SOURCEDIR)/generated/  # autosummary
+	rm -rf $(SOURCEDIR)/models/generated  # autosummary

 .PHONY: help Makefile docset


--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -5,3 +5,4 @@ sphinx-gallery>=0.11.1
 sphinx==5.0.0
 tabulate
 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
+pycocotools
--- a/docs/source/beta_status.py
+++ b/docs/source/beta_status.py
@@ -4,15 +4,26 @@ from docutils.parsers.rst import Directive

 class BetaStatus(Directive):
    has_content = True
+    text = "The {api_name} is in Beta stage, and backward compatibility is not guaranteed."
+    node = nodes.warning

    def run(self):
-        api_name = " ".join(self.content)
-        text = f"The {api_name} is in Beta stage, and backward compatibility is not guaranteed."
-        return [nodes.warning("", nodes.paragraph("", "", nodes.Text(text)))]
+        text = self.text.format(api_name=" ".join(self.content))
+        return [self.node("", nodes.paragraph("", "", nodes.Text(text)))]
+
+
+class V2BetaStatus(BetaStatus):
+    text = (
+        "The {api_name} is in Beta stage, and while we do not expect disruptive breaking changes, "
+        "some APIs may slightly change according to user feedback. Please submit any feedback you may have "
+        "in this issue: https://github.com/pytorch/vision/issues/6753."
+    )
+    node = nodes.note


 def setup(app):
    app.add_directive("betastatus", BetaStatus)
+    app.add_directive("v2betastatus", V2BetaStatus)
    return {
        "version": "0.1",
        "parallel_read_safe": True,

--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -29,6 +29,7 @@ from pathlib import Path
 import pytorch_sphinx_theme
 import torchvision
 import torchvision.models as M
+from sphinx_gallery.sorting import ExplicitOrder
 from tabulate import tabulate

 sys.path.append(os.path.abspath("."))
@@ -55,11 +56,65 @@ extensions = [
    "beta_status",
 ]

+# We override sphinx-gallery's example header to prevent sphinx-gallery from
+# creating a note at the top of the renderred notebook.
+# https://github.com/sphinx-gallery/sphinx-gallery/blob/451ccba1007cc523f39cbcc960ebc21ca39f7b75/sphinx_gallery/gen_rst.py#L1267-L1271
+# This is because we also want to add a link to google collab, so we write our own note in each example.
+from sphinx_gallery import gen_rst
+
+gen_rst.EXAMPLE_HEADER = """
+.. DO NOT EDIT.
+.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
+.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
+.. "{0}"
+.. LINE NUMBERS ARE GIVEN BELOW.
+
+.. rst-class:: sphx-glr-example-title
+
+.. _sphx_glr_{1}:
+
+"""
+
+
+class CustomGalleryExampleSortKey:
+    # See https://sphinx-gallery.github.io/stable/configuration.html#sorting-gallery-examples
+    # and https://github.com/sphinx-gallery/sphinx-gallery/blob/master/sphinx_gallery/sorting.py
+    def __init__(self, src_dir):
+        self.src_dir = src_dir
+
+    transforms_subsection_order = [
+        "plot_transforms_getting_started.py",
+        "plot_transforms_illustrations.py",
+        "plot_transforms_e2e.py",
+        "plot_cutmix_mixup.py",
+        "plot_custom_transforms.py",
+        "plot_tv_tensors.py",
+        "plot_custom_tv_tensors.py",
+    ]
+
+    def __call__(self, filename):
+        if "gallery/transforms" in self.src_dir:
+            try:
+                return self.transforms_subsection_order.index(filename)
+            except ValueError as e:
+                raise ValueError(
+                    "Looks like you added an example in gallery/transforms? "
+                    "You need to specify its order in docs/source/conf.py. Look for CustomGalleryExampleSortKey."
+                ) from e
+        else:
+            # For other subsections we just sort alphabetically by filename
+            return filename
+
+
 sphinx_gallery_conf = {
    "examples_dirs": "../../gallery/",  # path to your example scripts
    "gallery_dirs": "auto_examples",  # path to where to save gallery generated output
+    "subsection_order": ExplicitOrder(["../../gallery/transforms", "../../gallery/others"]),
    "backreferences_dir": "gen_modules/backreferences",
    "doc_module": ("torchvision",),
+    "remove_config_comments": True,
+    "ignore_pattern": "helpers.py",
+    "within_subsection_order": CustomGalleryExampleSortKey,
 }

 napoleon_use_ivar = True
@@ -88,17 +143,15 @@ author = "Torch Contributors"
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
-#
-# The short X.Y version.
-version = "main (" + torchvision.__version__ + " )"
-# The full version, including alpha/beta/rc tags.
-release = "main"
-VERSION = os.environ.get("VERSION", None)
-if VERSION:
+# version: The short X.Y version.
+# release: The full version, including alpha/beta/rc tags.
+if os.environ.get("TORCHVISION_SANITIZE_VERSION_STR_IN_DOCS", None):
    # Turn 1.11.0aHASH into 1.11 (major.minor only)
-    version = ".".join(version.split(".")[:2])
+    version = release = ".".join(torchvision.__version__.split(".")[:2])
    html_title = " ".join((project, version, "documentation"))
-    release = version
+else:
+    version = f"main ({torchvision.__version__})"
+    release = "main"


 # The language for content autogenerated by Sphinx. Refer to documentation
@@ -138,7 +191,7 @@ html_theme_options = {
    "logo_only": True,
    "pytorch_project": "docs",
    "navigation_with_keys": True,
-    "analytics_id": "UA-117752657-2",
+    "analytics_id": "GTM-T8XT4PS",
 }

 html_logo = "_static/img/pytorch-logo-dark.svg"
@@ -318,7 +371,7 @@ def inject_weight_metadata(app, what, name, obj, options, lines):
      used within the autoclass directive.
    """

-    if obj.__name__.endswith(("_Weights", "_QuantizedWeights")):
+    if getattr(obj, ".__name__", "").endswith(("_Weights", "_QuantizedWeights")):

        if len(obj) == 0:
            lines[:] = ["There are no available pre-trained weights."]
@@ -331,7 +384,7 @@ def inject_weight_metadata(app, what, name, obj, options, lines):
        ]

        if obj.__doc__ != "An enumeration.":
-            # We only show the custom enum doc if it was overriden. The default one from Python is "An enumeration"
+            # We only show the custom enum doc if it was overridden. The default one from Python is "An enumeration"
            lines.append("")
            lines.append(obj.__doc__)

@@ -362,6 +415,13 @@ def inject_weight_metadata(app, what, name, obj, options, lines):
                    max_visible = 3
                    v_sample = ", ".join(v[:max_visible])
                    v = f"{v_sample}, ... ({len(v)-max_visible} omitted)" if len(v) > max_visible else v_sample
+                elif k == "_ops":
+                    v = f"{v:.2f}"
+                    k = "GIPS" if obj.__name__.endswith("_QuantizedWeights") else "GFLOPS"
+                elif k == "_file_size":
+                    k = "File size"
+                    v = f"{v:.1f} MB"
+
                table.append((str(k), str(v)))
            table = tabulate(table, tablefmt="rst")
            lines += [".. rst-class:: table-weights"]  # Custom CSS class, see custom_torchvision.css
@@ -385,19 +445,27 @@ def generate_weights_table(module, table_name, metrics, dataset, include_pattern
    if exclude_patterns is not None:
        weights = [w for w in weights if all(p not in str(w) for p in exclude_patterns)]

+    ops_name = "GIPS" if "QuantizedWeights" in weights_endswith else "GFLOPS"
+
    metrics_keys, metrics_names = zip(*metrics)
-    column_names = ["Weight"] + list(metrics_names) + ["Params", "Recipe"]
+    column_names = ["Weight"] + list(metrics_names) + ["Params"] + [ops_name, "Recipe"]  # Final column order
    column_names = [f"**{name}**" for name in column_names]  # Add bold

-    content = [
-        (
+    content = []
+    for w in weights:
+        row = [
            f":class:`{w} <{type(w).__name__}>`",
            *(w.meta["_metrics"][dataset][metric] for metric in metrics_keys),
            f"{w.meta['num_params']/1e6:.1f}M",
+            f"{w.meta['_ops']:.2f}",
            f"`link <{w.meta['recipe']}>`__",
-        )
-        for w in weights
-    ]
+        ]
+
+        content.append(row)
+
+    column_widths = ["110"] + ["18"] * len(metrics_names) + ["18"] * 2 + ["10"]
+    widths_table = " ".join(column_widths)
+
    table = tabulate(content, headers=column_names, tablefmt="rst")

    generated_dir = Path("generated")
@@ -405,7 +473,7 @@ def generate_weights_table(module, table_name, metrics, dataset, include_pattern
    with open(generated_dir / f"{table_name}_table.rst", "w+") as table_file:
        table_file.write(".. rst-class:: table-weights\n")  # Custom CSS class, see custom_torchvision.css
        table_file.write(".. table::\n")
-        table_file.write(f"    :widths: 100 {'20 ' * len(metrics_names)} 20 10\n\n")
+        table_file.write(f"    :widths: {widths_table} \n\n")
        table_file.write(f"{textwrap.indent(table, ' ' * 4)}\n\n")



--- a/docs/source/datasets.rst
+++ b/docs/source/datasets.rst
+.. _datasets:
+
 Datasets
 ========

@@ -80,7 +82,6 @@ Image detection or segmentation
    CocoDetection
    CelebA
    Cityscapes
-    GTSRB
    Kitti
    OxfordIIITPet
    SBDataset
@@ -149,6 +150,14 @@ Video classification
    Kinetics
    UCF101

+Video prediction
+~~~~~~~~~~~~~~~~~~~~
+
+.. autosummary::
+    :toctree: generated/
+    :template: class_dataset.rst
+
+    MovingMNIST

 .. _base_classes_datasets:

@@ -162,3 +171,12 @@ Base classes for custom datasets
    DatasetFolder
    ImageFolder
    VisionDataset
+
+Transforms v2
+-------------
+
+.. autosummary::
+    :toctree: generated/
+    :template: function.rst
+
+    wrap_dataset_for_transforms_v2
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -32,6 +32,7 @@ architectures, and common image transformations for computer vision.
   :caption: Package Reference

   transforms
+   tv_tensors
   models
   datasets
   utils

--- a/docs/source/io.rst
+++ b/docs/source/io.rst
-Reading/Writing images and videos
-=================================
+Decoding / Encoding images and videos
+=====================================

 .. currentmodule:: torchvision.io

 The :mod:`torchvision.io` package provides functions for performing IO
-operations. They are currently specific to reading and writing video and
-images.
+operations. They are currently specific to reading and writing images and
+videos.
+
+Images
+------
+
+.. autosummary::
+    :toctree: generated/
+    :template: function.rst
+
+    read_image
+    decode_image
+    encode_jpeg
+    decode_jpeg
+    write_jpeg
+    encode_png
+    decode_png
+    write_png
+    read_file
+    write_file
+
+.. autosummary::
+    :toctree: generated/
+    :template: class.rst
+
+    ImageReadMode
+
+

 Video
 -----
@@ -20,7 +46,7 @@ Video


 Fine-grained video API
----------------------
+^^^^^^^^^^^^^^^^^^^^^^

 In addition to the :mod:`read_video` function, we provide a high-performance 
 lower-level API for more fine-grained control compared to the :mod:`read_video` function.
@@ -61,28 +87,3 @@ Example of inspecting a video:
    # the constructor we select a default video stream, but
    # in practice, we can set whichever stream we would like 
    video.set_current_stream("video:0")
-
-
-Image
-----
-
-.. autosummary::
-    :toctree: generated/
-    :template: class.rst
-
-    ImageReadMode
-
-.. autosummary::
-    :toctree: generated/
-    :template: function.rst
-
-    read_image
-    decode_image
-    encode_jpeg
-    decode_jpeg
-    write_jpeg
-    encode_png
-    decode_png
-    write_png
-    read_file
-    write_file
--- a/docs/source/models.rst
+++ b/docs/source/models.rst
@@ -120,13 +120,12 @@ behavior, such as batch normalization. To switch between these modes, use
    # Set model to eval mode
    model.eval()

-Model Registration Mechanism
----------------------------
-
-.. betastatus:: registration mechanism
+Listing and retrieving available models
+---------------------------------------

-As of v0.14, TorchVision offers a new model registration mechanism which allows retreaving models
-and weights by their names. Here are a few examples on how to use them:
+As of v0.14, TorchVision offers a new mechanism which allows listing and
+retrieving models and weights by their names. Here are a few examples on how to
+use them:

 .. code:: python

@@ -148,7 +147,7 @@ and weights by their names. Here are a few examples on how to use them:
    weights_enum2 = get_model_weights(torchvision.models.quantization.mobilenet_v3_large)
    assert weights_enum == weights_enum2

-Here are the available public methods of the model registration mechanism:
+Here are the available public functions to retrieve models and their corresponding weights:

 .. currentmodule:: torchvision.models
 .. autosummary::
@@ -518,6 +517,7 @@ pre-trained weights:
   models/video_mvit
   models/video_resnet
   models/video_s3d
+   models/video_swin_transformer

 |


--- a/docs/source/models/alexnet.rst
+++ b/docs/source/models/alexnet.rst
@@ -14,7 +14,7 @@ and is based on `One weird trick for parallelizing convolutional neural networks
 Model builders
 --------------

-The following model builders can be used to instanciate an AlexNet model, with or
+The following model builders can be used to instantiate an AlexNet model, with or
 without pre-trained weights. All the model builders internally rely on the
 ``torchvision.models.alexnet.AlexNet`` base class. Please refer to the `source
 code

--- a/docs/source/models/efficientnet.rst
+++ b/docs/source/models/efficientnet.rst
@@ -10,7 +10,7 @@ paper.
 Model builders
 --------------

-The following model builders can be used to instanciate an EfficientNet model, with or
+The following model builders can be used to instantiate an EfficientNet model, with or
 without pre-trained weights. All the model builders internally rely on the
 ``torchvision.models.efficientnet.EfficientNet`` base class. Please refer to the `source
 code

--- a/docs/source/models/efficientnetv2.rst
+++ b/docs/source/models/efficientnetv2.rst
@@ -10,7 +10,7 @@ paper.
 Model builders
 --------------

-The following model builders can be used to instanciate an EfficientNetV2 model, with or
+The following model builders can be used to instantiate an EfficientNetV2 model, with or
 without pre-trained weights. All the model builders internally rely on the
 ``torchvision.models.efficientnet.EfficientNet`` base class. Please refer to the `source
 code

--- a/docs/source/models/fcos.rst
+++ b/docs/source/models/fcos.rst
@@ -3,7 +3,7 @@ FCOS

 .. currentmodule:: torchvision.models.detection

-The RetinaNet model is based on the `FCOS: Fully Convolutional One-Stage Object Detection
+The FCOS model is based on the `FCOS: Fully Convolutional One-Stage Object Detection
 <https://arxiv.org/abs/1904.01355>`__ paper.

 .. betastatus:: detection module
@@ -12,7 +12,7 @@ Model builders
 --------------

 The following model builders can be used to instantiate a FCOS model, with or
-without pre-trained weights. All the model buidlers internally rely on the
+without pre-trained weights. All the model builders internally rely on the
 ``torchvision.models.detection.fcos.FCOS`` base class. Please refer to the `source code
 <https://github.com/pytorch/vision/blob/main/torchvision/models/detection/fcos.py>`_ for
 more details about this class.

--- a/docs/source/models/googlenet.rst
+++ b/docs/source/models/googlenet.rst
@@ -10,7 +10,7 @@ paper.
 Model builders
 --------------

-The following model builders can be used to instanciate a GoogLeNet model, with or
+The following model builders can be used to instantiate a GoogLeNet model, with or
 without pre-trained weights. All the model builders internally rely on the
 ``torchvision.models.googlenet.GoogLeNet`` base class. Please refer to the `source
 code

--- a/docs/source/models/googlenet_quant.rst
+++ b/docs/source/models/googlenet_quant.rst
@@ -10,7 +10,7 @@ paper.
 Model builders
 --------------

-The following model builders can be used to instanciate a quantized GoogLeNet
+The following model builders can be used to instantiate a quantized GoogLeNet
 model, with or without pre-trained weights. All the model builders internally
 rely on the ``torchvision.models.quantization.googlenet.QuantizableGoogLeNet``
 base class. Please refer to the `source code

--- a/docs/source/models/inception.rst
+++ b/docs/source/models/inception.rst
@@ -10,7 +10,7 @@ Computer Vision <https://arxiv.org/abs/1512.00567>`__ paper.
 Model builders
 --------------

-The following model builders can be used to instanciate an InceptionV3 model, with or
+The following model builders can be used to instantiate an InceptionV3 model, with or
 without pre-trained weights. All the model builders internally rely on the
 ``torchvision.models.inception.Inception3`` base class. Please refer to the `source
 code <https://github.com/pytorch/vision/blob/main/torchvision/models/inception.py>`_ for

--- a/docs/source/models/inception_quant.rst
+++ b/docs/source/models/inception_quant.rst
@@ -10,7 +10,7 @@ Computer Vision <https://arxiv.org/abs/1512.00567>`__ paper.
 Model builders
 --------------

-The following model builders can be used to instanciate a quantized Inception
+The following model builders can be used to instantiate a quantized Inception
 model, with or without pre-trained weights. All the model builders internally
 rely on the ``torchvision.models.quantization.inception.QuantizableInception3``
 base class. Please refer to the `source code