Unverified Commit b52f2331 authored by Vasilis Vryniotis's avatar Vasilis Vryniotis Committed by GitHub
Browse files

Document all remaining pre-trained weights (#6039)

* Adding docs for quantized models.

* Adding docs for video models.

* Adding docs for segmentation models.

* Adding docs for optical flow models.

* Adding docs for detection models.

* Fix typo.

* Make changes from code-review.
parent ae1d7071
...@@ -345,9 +345,7 @@ def inject_weight_metadata(app, what, name, obj, options, lines): ...@@ -345,9 +345,7 @@ def inject_weight_metadata(app, what, name, obj, options, lines):
metrics = meta.pop("metrics", {}) metrics = meta.pop("metrics", {})
meta_with_metrics = dict(meta, **metrics) meta_with_metrics = dict(meta, **metrics)
custom_docs = meta_with_metrics.pop("_docs", None) # Custom per-Weights docs lines += [meta_with_metrics.pop("_docs")]
if custom_docs is not None:
lines += [custom_docs]
if field == obj.DEFAULT: if field == obj.DEFAULT:
lines += [f"Also available as ``{obj.__name__}.DEFAULT``."] lines += [f"Also available as ``{obj.__name__}.DEFAULT``."]
......
...@@ -95,8 +95,8 @@ def test_schema_meta_validation(model_fn): ...@@ -95,8 +95,8 @@ def test_schema_meta_validation(model_fn):
# mandatory fields for each computer vision task # mandatory fields for each computer vision task
classification_fields = {"categories", ("metrics", "acc@1"), ("metrics", "acc@5")} classification_fields = {"categories", ("metrics", "acc@1"), ("metrics", "acc@5")}
defaults = { defaults = {
"all": {"metrics", "min_size", "num_params", "recipe"}, "all": {"metrics", "min_size", "num_params", "recipe", "_docs"},
"models": classification_fields | {"_docs"}, "models": classification_fields,
"detection": {"categories", ("metrics", "box_map")}, "detection": {"categories", ("metrics", "box_map")},
"quantization": classification_fields | {"backend", "unquantized"}, "quantization": classification_fields | {"backend", "unquantized"},
"segmentation": {"categories", ("metrics", "miou"), ("metrics", "pixel_acc")}, "segmentation": {"categories", ("metrics", "miou"), ("metrics", "pixel_acc")},
......
...@@ -386,6 +386,7 @@ class FasterRCNN_ResNet50_FPN_Weights(WeightsEnum): ...@@ -386,6 +386,7 @@ class FasterRCNN_ResNet50_FPN_Weights(WeightsEnum):
"metrics": { "metrics": {
"box_map": 37.0, "box_map": 37.0,
}, },
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
}, },
) )
DEFAULT = COCO_V1 DEFAULT = COCO_V1
...@@ -402,6 +403,7 @@ class FasterRCNN_ResNet50_FPN_V2_Weights(WeightsEnum): ...@@ -402,6 +403,7 @@ class FasterRCNN_ResNet50_FPN_V2_Weights(WeightsEnum):
"metrics": { "metrics": {
"box_map": 46.7, "box_map": 46.7,
}, },
"_docs": """These weights were produced using an enhanced training recipe to boost the model accuracy.""",
}, },
) )
DEFAULT = COCO_V1 DEFAULT = COCO_V1
...@@ -418,6 +420,7 @@ class FasterRCNN_MobileNet_V3_Large_FPN_Weights(WeightsEnum): ...@@ -418,6 +420,7 @@ class FasterRCNN_MobileNet_V3_Large_FPN_Weights(WeightsEnum):
"metrics": { "metrics": {
"box_map": 32.8, "box_map": 32.8,
}, },
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
}, },
) )
DEFAULT = COCO_V1 DEFAULT = COCO_V1
...@@ -434,6 +437,7 @@ class FasterRCNN_MobileNet_V3_Large_320_FPN_Weights(WeightsEnum): ...@@ -434,6 +437,7 @@ class FasterRCNN_MobileNet_V3_Large_320_FPN_Weights(WeightsEnum):
"metrics": { "metrics": {
"box_map": 22.8, "box_map": 22.8,
}, },
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
}, },
) )
DEFAULT = COCO_V1 DEFAULT = COCO_V1
...@@ -454,7 +458,7 @@ def fasterrcnn_resnet50_fpn( ...@@ -454,7 +458,7 @@ def fasterrcnn_resnet50_fpn(
) -> FasterRCNN: ) -> FasterRCNN:
""" """
Faster R-CNN model with a ResNet-50-FPN backbone from the `Faster R-CNN: Towards Real-Time Object Faster R-CNN model with a ResNet-50-FPN backbone from the `Faster R-CNN: Towards Real-Time Object
Detection with Region Proposal Networks <https://arxiv.org/abs/1703.06870>`__ Detection with Region Proposal Networks <https://arxiv.org/abs/1506.01497>`__
paper. paper.
The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
......
...@@ -661,6 +661,7 @@ class FCOS_ResNet50_FPN_Weights(WeightsEnum): ...@@ -661,6 +661,7 @@ class FCOS_ResNet50_FPN_Weights(WeightsEnum):
"metrics": { "metrics": {
"box_map": 39.2, "box_map": 39.2,
}, },
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
}, },
) )
DEFAULT = COCO_V1 DEFAULT = COCO_V1
......
...@@ -326,6 +326,10 @@ class KeypointRCNN_ResNet50_FPN_Weights(WeightsEnum): ...@@ -326,6 +326,10 @@ class KeypointRCNN_ResNet50_FPN_Weights(WeightsEnum):
"box_map": 50.6, "box_map": 50.6,
"kp_map": 61.1, "kp_map": 61.1,
}, },
"_docs": """
These weights were produced by following a similar training recipe as on the paper but use a checkpoint
from an early epoch.
""",
}, },
) )
COCO_V1 = Weights( COCO_V1 = Weights(
...@@ -339,6 +343,7 @@ class KeypointRCNN_ResNet50_FPN_Weights(WeightsEnum): ...@@ -339,6 +343,7 @@ class KeypointRCNN_ResNet50_FPN_Weights(WeightsEnum):
"box_map": 54.6, "box_map": 54.6,
"kp_map": 65.0, "kp_map": 65.0,
}, },
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
}, },
) )
DEFAULT = COCO_V1 DEFAULT = COCO_V1
......
...@@ -368,6 +368,7 @@ class MaskRCNN_ResNet50_FPN_Weights(WeightsEnum): ...@@ -368,6 +368,7 @@ class MaskRCNN_ResNet50_FPN_Weights(WeightsEnum):
"box_map": 37.9, "box_map": 37.9,
"mask_map": 34.6, "mask_map": 34.6,
}, },
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
}, },
) )
DEFAULT = COCO_V1 DEFAULT = COCO_V1
...@@ -385,6 +386,7 @@ class MaskRCNN_ResNet50_FPN_V2_Weights(WeightsEnum): ...@@ -385,6 +386,7 @@ class MaskRCNN_ResNet50_FPN_V2_Weights(WeightsEnum):
"box_map": 47.4, "box_map": 47.4,
"mask_map": 41.8, "mask_map": 41.8,
}, },
"_docs": """These weights were produced using an enhanced training recipe to boost the model accuracy.""",
}, },
) )
DEFAULT = COCO_V1 DEFAULT = COCO_V1
......
...@@ -690,6 +690,7 @@ class RetinaNet_ResNet50_FPN_Weights(WeightsEnum): ...@@ -690,6 +690,7 @@ class RetinaNet_ResNet50_FPN_Weights(WeightsEnum):
"metrics": { "metrics": {
"box_map": 36.4, "box_map": 36.4,
}, },
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
}, },
) )
DEFAULT = COCO_V1 DEFAULT = COCO_V1
...@@ -706,6 +707,7 @@ class RetinaNet_ResNet50_FPN_V2_Weights(WeightsEnum): ...@@ -706,6 +707,7 @@ class RetinaNet_ResNet50_FPN_V2_Weights(WeightsEnum):
"metrics": { "metrics": {
"box_map": 41.5, "box_map": 41.5,
}, },
"_docs": """These weights were produced using an enhanced training recipe to boost the model accuracy.""",
}, },
) )
DEFAULT = COCO_V1 DEFAULT = COCO_V1
......
...@@ -37,6 +37,7 @@ class SSD300_VGG16_Weights(WeightsEnum): ...@@ -37,6 +37,7 @@ class SSD300_VGG16_Weights(WeightsEnum):
"metrics": { "metrics": {
"box_map": 25.1, "box_map": 25.1,
}, },
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
}, },
) )
DEFAULT = COCO_V1 DEFAULT = COCO_V1
......
...@@ -196,6 +196,7 @@ class SSDLite320_MobileNet_V3_Large_Weights(WeightsEnum): ...@@ -196,6 +196,7 @@ class SSDLite320_MobileNet_V3_Large_Weights(WeightsEnum):
"metrics": { "metrics": {
"box_map": 21.3, "box_map": 21.3,
}, },
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
}, },
) )
DEFAULT = COCO_V1 DEFAULT = COCO_V1
......
...@@ -518,7 +518,7 @@ _COMMON_META = { ...@@ -518,7 +518,7 @@ _COMMON_META = {
class Raft_Large_Weights(WeightsEnum): class Raft_Large_Weights(WeightsEnum):
C_T_V1 = Weights( C_T_V1 = Weights(
# Chairs + Things, ported from original paper repo (raft-things.pth) # Weights ported from https://github.com/princeton-vl/RAFT
url="https://download.pytorch.org/models/raft_large_C_T_V1-22a6c225.pth", url="https://download.pytorch.org/models/raft_large_C_T_V1-22a6c225.pth",
transforms=OpticalFlow, transforms=OpticalFlow,
meta={ meta={
...@@ -531,11 +531,11 @@ class Raft_Large_Weights(WeightsEnum): ...@@ -531,11 +531,11 @@ class Raft_Large_Weights(WeightsEnum):
"kitti_train_per_image_epe": 5.0172, "kitti_train_per_image_epe": 5.0172,
"kitti_train_fl_all": 17.4506, "kitti_train_fl_all": 17.4506,
}, },
"_docs": """These weights were ported from the original paper. They are trained on Chairs + Things.""",
}, },
) )
C_T_V2 = Weights( C_T_V2 = Weights(
# Chairs + Things
url="https://download.pytorch.org/models/raft_large_C_T_V2-1bb1363a.pth", url="https://download.pytorch.org/models/raft_large_C_T_V2-1bb1363a.pth",
transforms=OpticalFlow, transforms=OpticalFlow,
meta={ meta={
...@@ -548,11 +548,12 @@ class Raft_Large_Weights(WeightsEnum): ...@@ -548,11 +548,12 @@ class Raft_Large_Weights(WeightsEnum):
"kitti_train_per_image_epe": 4.5118, "kitti_train_per_image_epe": 4.5118,
"kitti_train_fl_all": 16.0679, "kitti_train_fl_all": 16.0679,
}, },
"_docs": """These weights were trained from scratch on Chairs + Things.""",
}, },
) )
C_T_SKHT_V1 = Weights( C_T_SKHT_V1 = Weights(
# Chairs + Things + Sintel fine-tuning, ported from original paper repo (raft-sintel.pth) # Weights ported from https://github.com/princeton-vl/RAFT
url="https://download.pytorch.org/models/raft_large_C_T_SKHT_V1-0b8c9e55.pth", url="https://download.pytorch.org/models/raft_large_C_T_SKHT_V1-0b8c9e55.pth",
transforms=OpticalFlow, transforms=OpticalFlow,
meta={ meta={
...@@ -563,13 +564,14 @@ class Raft_Large_Weights(WeightsEnum): ...@@ -563,13 +564,14 @@ class Raft_Large_Weights(WeightsEnum):
"sintel_test_cleanpass_epe": 1.94, "sintel_test_cleanpass_epe": 1.94,
"sintel_test_finalpass_epe": 3.18, "sintel_test_finalpass_epe": 3.18,
}, },
"_docs": """
These weights were ported from the original paper. They are trained on Chairs + Things and fine-tuned on
Sintel (C+T+S+K+H).
""",
}, },
) )
C_T_SKHT_V2 = Weights( C_T_SKHT_V2 = Weights(
# Chairs + Things + Sintel fine-tuning, i.e.:
# Chairs + Things + (Sintel + Kitti + HD1K + Things_clean)
# Corresponds to the C+T+S+K+H on paper with fine-tuning on Sintel
url="https://download.pytorch.org/models/raft_large_C_T_SKHT_V2-ff5fadd5.pth", url="https://download.pytorch.org/models/raft_large_C_T_SKHT_V2-ff5fadd5.pth",
transforms=OpticalFlow, transforms=OpticalFlow,
meta={ meta={
...@@ -580,11 +582,14 @@ class Raft_Large_Weights(WeightsEnum): ...@@ -580,11 +582,14 @@ class Raft_Large_Weights(WeightsEnum):
"sintel_test_cleanpass_epe": 1.819, "sintel_test_cleanpass_epe": 1.819,
"sintel_test_finalpass_epe": 3.067, "sintel_test_finalpass_epe": 3.067,
}, },
"_docs": """
These weights were trained from scratch on Chairs + Things and fine-tuned on Sintel (C+T+S+K+H).
""",
}, },
) )
C_T_SKHT_K_V1 = Weights( C_T_SKHT_K_V1 = Weights(
# Chairs + Things + Sintel fine-tuning + Kitti fine-tuning, ported from the original repo (sintel-kitti.pth) # Weights ported from https://github.com/princeton-vl/RAFT
url="https://download.pytorch.org/models/raft_large_C_T_SKHT_K_V1-4a6a5039.pth", url="https://download.pytorch.org/models/raft_large_C_T_SKHT_K_V1-4a6a5039.pth",
transforms=OpticalFlow, transforms=OpticalFlow,
meta={ meta={
...@@ -594,14 +599,14 @@ class Raft_Large_Weights(WeightsEnum): ...@@ -594,14 +599,14 @@ class Raft_Large_Weights(WeightsEnum):
"metrics": { "metrics": {
"kitti_test_fl_all": 5.10, "kitti_test_fl_all": 5.10,
}, },
"_docs": """
These weights were ported from the original paper. They are trained on Chairs + Things, fine-tuned on
Sintel and then on Kitti.
""",
}, },
) )
C_T_SKHT_K_V2 = Weights( C_T_SKHT_K_V2 = Weights(
# Chairs + Things + Sintel fine-tuning + Kitti fine-tuning i.e.:
# Chairs + Things + (Sintel + Kitti + HD1K + Things_clean) + Kitti
# Same as CT_SKHT with extra fine-tuning on Kitti
# Corresponds to the C+T+S+K+H on paper with fine-tuning on Sintel and then on Kitti
url="https://download.pytorch.org/models/raft_large_C_T_SKHT_K_V2-b5c70766.pth", url="https://download.pytorch.org/models/raft_large_C_T_SKHT_K_V2-b5c70766.pth",
transforms=OpticalFlow, transforms=OpticalFlow,
meta={ meta={
...@@ -611,6 +616,9 @@ class Raft_Large_Weights(WeightsEnum): ...@@ -611,6 +616,9 @@ class Raft_Large_Weights(WeightsEnum):
"metrics": { "metrics": {
"kitti_test_fl_all": 5.19, "kitti_test_fl_all": 5.19,
}, },
"_docs": """
These weights were trained from scratch on Chairs + Things, fine-tuned on Sintel and then on Kitti.
""",
}, },
) )
...@@ -619,7 +627,7 @@ class Raft_Large_Weights(WeightsEnum): ...@@ -619,7 +627,7 @@ class Raft_Large_Weights(WeightsEnum):
class Raft_Small_Weights(WeightsEnum): class Raft_Small_Weights(WeightsEnum):
C_T_V1 = Weights( C_T_V1 = Weights(
# Chairs + Things, ported from original paper repo (raft-small.pth) # Weights ported from https://github.com/princeton-vl/RAFT
url="https://download.pytorch.org/models/raft_small_C_T_V1-ad48884c.pth", url="https://download.pytorch.org/models/raft_small_C_T_V1-ad48884c.pth",
transforms=OpticalFlow, transforms=OpticalFlow,
meta={ meta={
...@@ -632,10 +640,10 @@ class Raft_Small_Weights(WeightsEnum): ...@@ -632,10 +640,10 @@ class Raft_Small_Weights(WeightsEnum):
"kitti_train_per_image_epe": 7.6557, "kitti_train_per_image_epe": 7.6557,
"kitti_train_fl_all": 25.2801, "kitti_train_fl_all": 25.2801,
}, },
"_docs": """These weights were ported from the original paper. They are trained on Chairs + Things.""",
}, },
) )
C_T_V2 = Weights( C_T_V2 = Weights(
# Chairs + Things
url="https://download.pytorch.org/models/raft_small_C_T_V2-01064c6d.pth", url="https://download.pytorch.org/models/raft_small_C_T_V2-01064c6d.pth",
transforms=OpticalFlow, transforms=OpticalFlow,
meta={ meta={
...@@ -648,6 +656,7 @@ class Raft_Small_Weights(WeightsEnum): ...@@ -648,6 +656,7 @@ class Raft_Small_Weights(WeightsEnum):
"kitti_train_per_image_epe": 7.5978, "kitti_train_per_image_epe": 7.5978,
"kitti_train_fl_all": 25.2369, "kitti_train_fl_all": 25.2369,
}, },
"_docs": """These weights were trained from scratch on Chairs + Things.""",
}, },
) )
......
...@@ -121,6 +121,10 @@ class GoogLeNet_QuantizedWeights(WeightsEnum): ...@@ -121,6 +121,10 @@ class GoogLeNet_QuantizedWeights(WeightsEnum):
"acc@1": 69.826, "acc@1": 69.826,
"acc@5": 89.404, "acc@5": 89.404,
}, },
"_docs": """
These weights were produced by doing Post Training Quantization (eager mode) on top of the unquantized
weights listed below.
""",
}, },
) )
DEFAULT = IMAGENET1K_FBGEMM_V1 DEFAULT = IMAGENET1K_FBGEMM_V1
......
...@@ -187,6 +187,10 @@ class Inception_V3_QuantizedWeights(WeightsEnum): ...@@ -187,6 +187,10 @@ class Inception_V3_QuantizedWeights(WeightsEnum):
"acc@1": 77.176, "acc@1": 77.176,
"acc@5": 93.354, "acc@5": 93.354,
}, },
"_docs": """
These weights were produced by doing Post Training Quantization (eager mode) on top of the unquantized
weights listed below.
""",
}, },
) )
DEFAULT = IMAGENET1K_FBGEMM_V1 DEFAULT = IMAGENET1K_FBGEMM_V1
......
...@@ -79,6 +79,10 @@ class MobileNet_V2_QuantizedWeights(WeightsEnum): ...@@ -79,6 +79,10 @@ class MobileNet_V2_QuantizedWeights(WeightsEnum):
"acc@1": 71.658, "acc@1": 71.658,
"acc@5": 90.150, "acc@5": 90.150,
}, },
"_docs": """
These weights were produced by doing Quantization Aware Training (eager mode) on top of the unquantized
weights listed below.
""",
}, },
) )
DEFAULT = IMAGENET1K_QNNPACK_V1 DEFAULT = IMAGENET1K_QNNPACK_V1
......
...@@ -173,6 +173,10 @@ class MobileNet_V3_Large_QuantizedWeights(WeightsEnum): ...@@ -173,6 +173,10 @@ class MobileNet_V3_Large_QuantizedWeights(WeightsEnum):
"acc@1": 73.004, "acc@1": 73.004,
"acc@5": 90.858, "acc@5": 90.858,
}, },
"_docs": """
These weights were produced by doing Quantization Aware Training (eager mode) on top of the unquantized
weights listed below.
""",
}, },
) )
DEFAULT = IMAGENET1K_QNNPACK_V1 DEFAULT = IMAGENET1K_QNNPACK_V1
......
...@@ -154,6 +154,10 @@ _COMMON_META = { ...@@ -154,6 +154,10 @@ _COMMON_META = {
"categories": _IMAGENET_CATEGORIES, "categories": _IMAGENET_CATEGORIES,
"backend": "fbgemm", "backend": "fbgemm",
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#post-training-quantized-models", "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#post-training-quantized-models",
"_docs": """
These weights were produced by doing Post Training Quantization (eager mode) on top of the unquantized
weights listed below.
""",
} }
......
...@@ -118,6 +118,10 @@ _COMMON_META = { ...@@ -118,6 +118,10 @@ _COMMON_META = {
"categories": _IMAGENET_CATEGORIES, "categories": _IMAGENET_CATEGORIES,
"backend": "fbgemm", "backend": "fbgemm",
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#post-training-quantized-models", "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#post-training-quantized-models",
"_docs": """
These weights were produced by doing Post Training Quantization (eager mode) on top of the unquantized
weights listed below.
""",
} }
......
...@@ -131,6 +131,10 @@ def _deeplabv3_resnet( ...@@ -131,6 +131,10 @@ def _deeplabv3_resnet(
_COMMON_META = { _COMMON_META = {
"categories": _VOC_CATEGORIES, "categories": _VOC_CATEGORIES,
"min_size": (1, 1), "min_size": (1, 1),
"_docs": """
These weights were trained on a subset of COCO, using only the 20 categories that are present in the Pascal VOC
dataset.
""",
} }
......
...@@ -50,6 +50,10 @@ class FCNHead(nn.Sequential): ...@@ -50,6 +50,10 @@ class FCNHead(nn.Sequential):
_COMMON_META = { _COMMON_META = {
"categories": _VOC_CATEGORIES, "categories": _VOC_CATEGORIES,
"min_size": (1, 1), "min_size": (1, 1),
"_docs": """
These weights were trained on a subset of COCO, using only the 20 categories that are present in the Pascal VOC
dataset.
""",
} }
......
...@@ -106,6 +106,10 @@ class LRASPP_MobileNet_V3_Large_Weights(WeightsEnum): ...@@ -106,6 +106,10 @@ class LRASPP_MobileNet_V3_Large_Weights(WeightsEnum):
"miou": 57.9, "miou": 57.9,
"pixel_acc": 91.2, "pixel_acc": 91.2,
}, },
"_docs": """
These weights were trained on a subset of COCO, using only the 20 categories that are present in the
Pascal VOC dataset.
""",
}, },
) )
DEFAULT = COCO_WITH_VOC_LABELS_V1 DEFAULT = COCO_WITH_VOC_LABELS_V1
......
...@@ -312,6 +312,7 @@ _COMMON_META = { ...@@ -312,6 +312,7 @@ _COMMON_META = {
"min_size": (1, 1), "min_size": (1, 1),
"categories": _KINETICS400_CATEGORIES, "categories": _KINETICS400_CATEGORIES,
"recipe": "https://github.com/pytorch/vision/tree/main/references/video_classification", "recipe": "https://github.com/pytorch/vision/tree/main/references/video_classification",
"_docs": """These weights reproduce closely the accuracy of the paper for 16-frame clip inputs.""",
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment