Start doc revamp for semantic segmentation models (#5884)

413b7103 · Nicolas Hug · GitHub · a8f563db · 413b7103 · 413b7103
Unverified Commit 413b7103 authored Apr 27, 2022 by Nicolas Hug Committed by GitHub Apr 27, 2022
5 changed files
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -376,6 +376,9 @@ def generate_weights_table(module, table_name, metrics):

 generate_weights_table(module=M, table_name="classification", metrics=[("acc@1", "Acc@1"), ("acc@5", "Acc@5")])
 generate_weights_table(module=M.detection, table_name="detection", metrics=[("box_map", "Box MAP")])
+generate_weights_table(
+    module=M.segmentation, table_name="segmentation", metrics=[("miou", "Mean IoU"), ("pixel_acc", "pixelwise Acc")]
+)


 def setup(app):

--- a/docs/source/models/deeplabv3.rst
+++ b/docs/source/models/deeplabv3.rst
+DeepLabV3
+=========
+
+.. currentmodule:: torchvision.models.segmentation
+
+The DeepLabV3 model is based on the `Rethinking Atrous Convolution for Semantic
+Image Segmentation <https://arxiv.org/abs/1706.05587>`__ paper.
+
+
+Model builders
+--------------
+
+The following model builders can be used to instantiate a DeepLabV3 model with
+different backbones, with or without pre-trained weights. All the model builders
+internally rely on the ``torchvision.models.segmentation.deeplabv3.DeepLabV3`` base class. Please
+refer to the `source code
+<https://github.com/pytorch/vision/blob/main/torchvision/models/segmentation/deeplabv3.py>`_
+for more details about this class.
+
+.. autosummary::
+    :toctree: generated/
+    :template: function.rst
+
+    deeplabv3_mobilenet_v3_large
+    deeplabv3_resnet50
+    deeplabv3_resnet101
--- a/docs/source/models_new.rst
+++ b/docs/source/models_new.rst
@@ -56,6 +56,28 @@ Accuracies are reported on ImageNet

 .. include:: generated/classification_table.rst

+Semantic Segmentation
+=====================
+
+.. currentmodule:: torchvision.models.segmentation
+
+The following semantic segmentation models are available, with or without
+pre-trained weights:
+
+.. toctree::
+   :maxdepth: 1
+
+   models/deeplabv3
+
+
+Table of all available semantic segmentation weights
+----------------------------------------------------
+
+All models are evaluated on COCO val2017:
+
+.. include:: generated/segmentation_table.rst
+
+

 Object Detection, Instance Segmentation and Person Keypoint Detection
 =====================================================================

--- a/torchvision/models/detection/retinanet.py
+++ b/torchvision/models/detection/retinanet.py
@@ -775,6 +775,10 @@ def retinanet_resnet50_fpn(
        trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block.
            Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. If ``None`` is
            passed (the default) this value is set to 3.
+        **kwargs: parameters passed to the ``torchvision.models.detection.RetinaNet``
+            base class. Please refer to the `source code
+            <https://github.com/pytorch/vision/blob/main/torchvision/models/detection/retinanet.py>`_
+            for more details about this class.

    .. autoclass:: torchvision.models.detection.RetinaNet_ResNet50_FPN_Weights
        :members:
@@ -837,6 +841,10 @@ def retinanet_resnet50_fpn_v2(
        trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block.
            Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. If ``None`` is
            passed (the default) this value is set to 3.
+        **kwargs: parameters passed to the ``torchvision.models.detection.RetinaNet``
+            base class. Please refer to the `source code
+            <https://github.com/pytorch/vision/blob/main/torchvision/models/detection/retinanet.py>`_
+            for more details about this class.

    .. autoclass:: torchvision.models.detection.RetinaNet_ResNet50_FPN_V2_Weights
        :members:

--- a/torchvision/models/segmentation/deeplabv3.py
+++ b/torchvision/models/segmentation/deeplabv3.py
@@ -223,12 +223,24 @@ def deeplabv3_resnet50(
 ) -> DeepLabV3:
    """Constructs a DeepLabV3 model with a ResNet-50 backbone.

+    Reference: `Rethinking Atrous Convolution for Semantic Image Segmentation <https://arxiv.org/abs/1706.05587>`__.
+
    Args:
-        weights (DeepLabV3_ResNet50_Weights, optional): The pretrained weights for the model
-        progress (bool): If True, displays a progress bar of the download to stderr
+        weights (:class:`~torchvision.models.segmentation.DeepLabV3_ResNet50_Weights`, optional): The
+            pretrained weights to use. See
+            :class:`~torchvision.models.segmentation.DeepLabV3_ResNet50_Weights` below for
+            more details, and possible values. By default, no pre-trained
+            weights are used.
+        progress (bool, optional): If True, displays a progress bar of the
+            download to stderr. Default is True.
        num_classes (int, optional): number of output classes of the model (including the background)
        aux_loss (bool, optional): If True, it uses an auxiliary loss
-        weights_backbone (ResNet50_Weights, optional): The pretrained weights for the backbone
+        weights_backbone (:class:`~torchvision.models.ResNet50_Weights`, optional): The pretrained weights for the
+            backbone
+        **kwargs: unused
+
+    .. autoclass:: torchvision.models.segmentation.DeepLabV3_ResNet50_Weights
+        :members:
    """
    weights = DeepLabV3_ResNet50_Weights.verify(weights)
    weights_backbone = ResNet50_Weights.verify(weights_backbone)
@@ -264,12 +276,24 @@ def deeplabv3_resnet101(
 ) -> DeepLabV3:
    """Constructs a DeepLabV3 model with a ResNet-101 backbone.

+    Reference: `Rethinking Atrous Convolution for Semantic Image Segmentation <https://arxiv.org/abs/1706.05587>`__.
+
    Args:
-        weights (DeepLabV3_ResNet101_Weights, optional): The pretrained weights for the model
-        progress (bool): If True, displays a progress bar of the download to stderr
-        num_classes (int): The number of classes
-        aux_loss (bool, optional): If True, include an auxiliary classifier
-        weights_backbone (ResNet101_Weights, optional): The pretrained weights for the backbone
+        weights (:class:`~torchvision.models.segmentation.DeepLabV3_ResNet101_Weights`, optional): The
+            pretrained weights to use. See
+            :class:`~torchvision.models.segmentation.DeepLabV3_ResNet101_Weights` below for
+            more details, and possible values. By default, no pre-trained
+            weights are used.
+        progress (bool, optional): If True, displays a progress bar of the
+            download to stderr. Default is True.
+        num_classes (int, optional): number of output classes of the model (including the background)
+        aux_loss (bool, optional): If True, it uses an auxiliary loss
+        weights_backbone (:class:`~torchvision.models.ResNet101_Weights`, optional): The pretrained weights for the
+            backbone
+        **kwargs: unused
+
+    .. autoclass:: torchvision.models.segmentation.DeepLabV3_ResNet101_Weights
+        :members:
    """
    weights = DeepLabV3_ResNet101_Weights.verify(weights)
    weights_backbone = ResNet101_Weights.verify(weights_backbone)
@@ -305,12 +329,24 @@ def deeplabv3_mobilenet_v3_large(
 ) -> DeepLabV3:
    """Constructs a DeepLabV3 model with a MobileNetV3-Large backbone.

+    Reference: `Rethinking Atrous Convolution for Semantic Image Segmentation <https://arxiv.org/abs/1706.05587>`__.
+
    Args:
-        weights (DeepLabV3_MobileNet_V3_Large_Weights, optional): The pretrained weights for the model
-        progress (bool): If True, displays a progress bar of the download to stderr
+        weights (:class:`~torchvision.models.segmentation.DeepLabV3_MobileNet_V3_Large_Weights`, optional): The
+            pretrained weights to use. See
+            :class:`~torchvision.models.segmentation.DeepLabV3_MobileNet_V3_Large_Weights` below for
+            more details, and possible values. By default, no pre-trained
+            weights are used.
+        progress (bool, optional): If True, displays a progress bar of the
+            download to stderr. Default is True.
        num_classes (int, optional): number of output classes of the model (including the background)
        aux_loss (bool, optional): If True, it uses an auxiliary loss
-        weights_backbone (MobileNet_V3_Large_Weights, optional): The pretrained weights for the backbone
+        weights_backbone (:class:`~torchvision.models.MobileNet_V3_Large_Weights`, optional): The pretrained weights
+            for the backbone
+        **kwargs: unused
+
+    .. autoclass:: torchvision.models.segmentation.DeepLabV3_MobileNet_V3_Large_Weights
+        :members:
    """
    weights = DeepLabV3_MobileNet_V3_Large_Weights.verify(weights)
    weights_backbone = MobileNet_V3_Large_Weights.verify(weights_backbone)