Unverified Commit 11bd2eaa authored by Vasilis Vryniotis's avatar Vasilis Vryniotis Committed by GitHub
Browse files

Port Multi-weight support from prototype to main (#5618)



* Moving basefiles outside of prototype and porting Alexnet, ConvNext, Densenet and EfficientNet.

* Porting googlenet

* Porting inception

* Porting mnasnet

* Porting mobilenetv2

* Porting mobilenetv3

* Porting regnet

* Porting resnet

* Porting shufflenetv2

* Porting squeezenet

* Porting vgg

* Porting vit

* Fix docstrings

* Fixing imports

* Adding missing import

* Fix mobilenet imports

* Fix tests

* Fix prototype tests

* Exclude get_weight from models on test

* Fix init files

* Porting googlenet

* Porting inception

* porting mobilenetv2

* porting mobilenetv3

* porting resnet

* porting shufflenetv2

* Fix test and linter

* Fixing docs.

* Porting Detection models (#5617)

* fix inits

* fix docs

* Port faster_rcnn

* Port fcos

* Port keypoint_rcnn

* Port mask_rcnn

* Port retinanet

* Port ssd

* Port ssdlite

* Fix linter

* Fixing tests

* Fixing tests

* Fixing vgg test

* Porting Optical Flow, Segmentation, Video models (#5619)

* Porting raft

* Porting video resnet

* Porting deeplabv3

* Porting fcn and lraspp

* Fixing the tests and linter

* Porting docs, examples, tutorials and galleries (#5620)

* Fix examples, tutorials and gallery

* Update gallery/plot_optical_flow.py
Co-authored-by: default avatarNicolas Hug <contact@nicolas-hug.com>

* Fix import

* Revert hardcoded normalization

* fix uncommitted changes

* Fix bug

* Fix more bugs

* Making resize optional for segmentation

* Fixing preset

* Fix mypy

* Fixing documentation strings

* Fix flake8

* minor refactoring
Co-authored-by: default avatarNicolas Hug <contact@nicolas-hug.com>

* Resolve conflict

* Porting model tests (#5622)

* Porting tests

* Remove unnecessary variable

* Fix linter

* Move prototype to extended tests

* Fix download models job

* Update CI on Multiweight branch to use the new weight download approach (#5628)

* port Pad to prototype transforms (#5621)

* port Pad to prototype transforms

* use literal

* Bump up LibTorchvision version number for Podspec to release Cocoapods (#5624)
Co-authored-by: default avatarAnton Thomma <anton@pri.co.nz>
Co-authored-by: default avatarVasilis Vryniotis <datumbox@users.noreply.github.com>

* pre-download model weights in CI docs build (#5625)

* pre-download model weights in CI docs build

* move changes into template

* change docs image

* Regenerated config.yml
Co-authored-by: default avatarPhilip Meier <github.pmeier@posteo.de>
Co-authored-by: default avatarAnton Thomma <11010310+thommaa@users.noreply.github.com>
Co-authored-by: default avatarAnton Thomma <anton@pri.co.nz>

* Porting reference scripts and updating presets (#5629)

* Making _preset.py classes

* Remove support of targets on presets.

* Rewriting the video preset

* Adding tests to check that the bundled transforms are JIT scriptable

* Rename all presets from *Eval to *Inference

* Minor refactoring

* Remove --prototype and --pretrained from reference scripts

* remove  pretained_backbone refs

* Corrections and simplifications

* Fixing bug

* Fixing linter

* Fix flake8

* restore documentation example

* minor fixes

* fix optical flow missing param

* Fixing commands

* Adding weights_backbone support in detection and segmentation

* Updating the commands for InceptionV3

* Setting `weights_backbone` to its fully BC value (#5653)

* Replace default `weights_backbone=None` with its BC values.

* Fixing tests

* Fix linter

* Update docs.

* Update preprocessing on reference scripts.

* Change qat/ptq to their full values.

* Refactoring preprocessing

* Fix video preset

* No initialization on VGG if pretrained

* Fix warning messages for backbone utils.

* Adding star to all preset constructors.

* Fix mypy.
Co-authored-by: default avatarNicolas Hug <contact@nicolas-hug.com>
Co-authored-by: default avatarPhilip Meier <github.pmeier@posteo.de>
Co-authored-by: default avatarAnton Thomma <11010310+thommaa@users.noreply.github.com>
Co-authored-by: default avatarAnton Thomma <anton@pri.co.nz>
parent 375e4ab2
import re
from collections import OrderedDict
from typing import Any, List, Tuple
from functools import partial
from typing import Any, List, Optional, Tuple
import torch
import torch.nn as nn
......@@ -8,18 +9,24 @@ import torch.nn.functional as F
import torch.utils.checkpoint as cp
from torch import Tensor
from .._internally_replaced_utils import load_state_dict_from_url
from ..transforms._presets import ImageClassification, InterpolationMode
from ..utils import _log_api_usage_once
from ._api import WeightsEnum, Weights
from ._meta import _IMAGENET_CATEGORIES
from ._utils import handle_legacy_interface, _ovewrite_named_param
__all__ = ["DenseNet", "densenet121", "densenet169", "densenet201", "densenet161"]
model_urls = {
"densenet121": "https://download.pytorch.org/models/densenet121-a639ec97.pth",
"densenet169": "https://download.pytorch.org/models/densenet169-b2777c0a.pth",
"densenet201": "https://download.pytorch.org/models/densenet201-c1103571.pth",
"densenet161": "https://download.pytorch.org/models/densenet161-8d451a50.pth",
}
__all__ = [
"DenseNet",
"DenseNet121_Weights",
"DenseNet161_Weights",
"DenseNet169_Weights",
"DenseNet201_Weights",
"densenet121",
"densenet161",
"densenet169",
"densenet201",
]
class _DenseLayer(nn.Module):
......@@ -220,7 +227,7 @@ class DenseNet(nn.Module):
return out
def _load_state_dict(model: nn.Module, model_url: str, progress: bool) -> None:
def _load_state_dict(model: nn.Module, weights: WeightsEnum, progress: bool) -> None:
# '.'s are no longer allowed in module names, but previous _DenseLayer
# has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
# They are also in the checkpoints in model_urls. This pattern is used
......@@ -229,7 +236,7 @@ def _load_state_dict(model: nn.Module, model_url: str, progress: bool) -> None:
r"^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$"
)
state_dict = load_state_dict_from_url(model_url, progress=progress)
state_dict = weights.get_state_dict(progress=progress)
for key in list(state_dict.keys()):
res = pattern.match(key)
if res:
......@@ -240,71 +247,155 @@ def _load_state_dict(model: nn.Module, model_url: str, progress: bool) -> None:
def _densenet(
arch: str,
growth_rate: int,
block_config: Tuple[int, int, int, int],
num_init_features: int,
pretrained: bool,
weights: Optional[WeightsEnum],
progress: bool,
**kwargs: Any,
) -> DenseNet:
if weights is not None:
_ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"]))
model = DenseNet(growth_rate, block_config, num_init_features, **kwargs)
if pretrained:
_load_state_dict(model, model_urls[arch], progress)
if weights is not None:
_load_state_dict(model=model, weights=weights, progress=progress)
return model
def densenet121(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> DenseNet:
_COMMON_META = {
"task": "image_classification",
"architecture": "DenseNet",
"publication_year": 2016,
"size": (224, 224),
"min_size": (29, 29),
"categories": _IMAGENET_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
"recipe": "https://github.com/pytorch/vision/pull/116",
}
class DenseNet121_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/densenet121-a639ec97.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"num_params": 7978856,
"acc@1": 74.434,
"acc@5": 91.972,
},
)
DEFAULT = IMAGENET1K_V1
class DenseNet161_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/densenet161-8d451a50.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"num_params": 28681000,
"acc@1": 77.138,
"acc@5": 93.560,
},
)
DEFAULT = IMAGENET1K_V1
class DenseNet169_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/densenet169-b2777c0a.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"num_params": 14149480,
"acc@1": 75.600,
"acc@5": 92.806,
},
)
DEFAULT = IMAGENET1K_V1
class DenseNet201_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/densenet201-c1103571.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"num_params": 20013928,
"acc@1": 76.896,
"acc@5": 93.370,
},
)
DEFAULT = IMAGENET1K_V1
@handle_legacy_interface(weights=("pretrained", DenseNet121_Weights.IMAGENET1K_V1))
def densenet121(*, weights: Optional[DenseNet121_Weights] = None, progress: bool = True, **kwargs: Any) -> DenseNet:
r"""Densenet-121 model from
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_.
The required minimum input size of the model is 29x29.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
weights (DenseNet121_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr
memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_.
"""
return _densenet("densenet121", 32, (6, 12, 24, 16), 64, pretrained, progress, **kwargs)
weights = DenseNet121_Weights.verify(weights)
return _densenet(32, (6, 12, 24, 16), 64, weights, progress, **kwargs)
def densenet161(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> DenseNet:
@handle_legacy_interface(weights=("pretrained", DenseNet161_Weights.IMAGENET1K_V1))
def densenet161(*, weights: Optional[DenseNet161_Weights] = None, progress: bool = True, **kwargs: Any) -> DenseNet:
r"""Densenet-161 model from
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_.
The required minimum input size of the model is 29x29.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
weights (DenseNet161_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr
memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_.
"""
return _densenet("densenet161", 48, (6, 12, 36, 24), 96, pretrained, progress, **kwargs)
weights = DenseNet161_Weights.verify(weights)
return _densenet(48, (6, 12, 36, 24), 96, weights, progress, **kwargs)
def densenet169(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> DenseNet:
@handle_legacy_interface(weights=("pretrained", DenseNet169_Weights.IMAGENET1K_V1))
def densenet169(*, weights: Optional[DenseNet169_Weights] = None, progress: bool = True, **kwargs: Any) -> DenseNet:
r"""Densenet-169 model from
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_.
The required minimum input size of the model is 29x29.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
weights (DenseNet169_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr
memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_.
"""
return _densenet("densenet169", 32, (6, 12, 32, 32), 64, pretrained, progress, **kwargs)
weights = DenseNet169_Weights.verify(weights)
return _densenet(32, (6, 12, 32, 32), 64, weights, progress, **kwargs)
def densenet201(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> DenseNet:
@handle_legacy_interface(weights=("pretrained", DenseNet201_Weights.IMAGENET1K_V1))
def densenet201(*, weights: Optional[DenseNet201_Weights] = None, progress: bool = True, **kwargs: Any) -> DenseNet:
r"""Densenet-201 model from
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_.
The required minimum input size of the model is 29x29.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
weights (DenseNet201_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr
memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_.
"""
return _densenet("densenet201", 32, (6, 12, 48, 32), 64, pretrained, progress, **kwargs)
weights = DenseNet201_Weights.verify(weights)
return _densenet(32, (6, 12, 48, 32), 64, weights, progress, **kwargs)
from .faster_rcnn import *
from .mask_rcnn import *
from .fcos import *
from .keypoint_rcnn import *
from .mask_rcnn import *
from .retinanet import *
from .ssd import *
from .ssdlite import *
from .fcos import *
......@@ -6,7 +6,8 @@ from torchvision.ops import misc as misc_nn_ops
from torchvision.ops.feature_pyramid_network import ExtraFPNBlock, FeaturePyramidNetwork, LastLevelMaxPool
from .. import mobilenet, resnet
from .._utils import IntermediateLayerGetter
from .._api import WeightsEnum, get_enum_from_fn
from .._utils import IntermediateLayerGetter, handle_legacy_interface
class BackboneWithFPN(nn.Module):
......@@ -55,9 +56,16 @@ class BackboneWithFPN(nn.Module):
return x
@handle_legacy_interface(
weights=(
"pretrained",
lambda kwargs: get_enum_from_fn(resnet.__dict__[kwargs["backbone_name"]]).from_str("IMAGENET1K_V1"),
),
)
def resnet_fpn_backbone(
*,
backbone_name: str,
pretrained: bool,
weights: Optional[WeightsEnum],
norm_layer: Callable[..., nn.Module] = misc_nn_ops.FrozenBatchNorm2d,
trainable_layers: int = 3,
returned_layers: Optional[List[int]] = None,
......@@ -69,7 +77,7 @@ def resnet_fpn_backbone(
Examples::
>>> from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
>>> backbone = resnet_fpn_backbone('resnet50', pretrained=True, trainable_layers=3)
>>> backbone = resnet_fpn_backbone('resnet50', weights=ResNet50_Weights.DEFAULT, trainable_layers=3)
>>> # get some dummy image
>>> x = torch.rand(1,3,64,64)
>>> # compute the output
......@@ -85,10 +93,10 @@ def resnet_fpn_backbone(
Args:
backbone_name (string): resnet architecture. Possible values are 'resnet18', 'resnet34', 'resnet50',
'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2'
pretrained (bool): If True, returns a model with backbone pre-trained on Imagenet
weights (WeightsEnum, optional): The pretrained weights for the model
norm_layer (callable): it is recommended to use the default value. For details visit:
(https://github.com/facebookresearch/maskrcnn-benchmark/issues/267)
trainable_layers (int): number of trainable (not frozen) resnet layers starting from final block.
trainable_layers (int): number of trainable (not frozen) layers starting from final block.
Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable.
returned_layers (list of int): The layers of the network to return. Each entry must be in ``[1, 4]``.
By default all layers are returned.
......@@ -98,7 +106,7 @@ def resnet_fpn_backbone(
a new list of feature maps and their corresponding names. By
default a ``LastLevelMaxPool`` is used.
"""
backbone = resnet.__dict__[backbone_name](pretrained=pretrained, norm_layer=norm_layer)
backbone = resnet.__dict__[backbone_name](weights=weights, norm_layer=norm_layer)
return _resnet_fpn_extractor(backbone, trainable_layers, returned_layers, extra_blocks)
......@@ -135,13 +143,13 @@ def _resnet_fpn_extractor(
def _validate_trainable_layers(
pretrained: bool,
is_trained: bool,
trainable_backbone_layers: Optional[int],
max_value: int,
default_value: int,
) -> int:
# don't freeze any layers if pretrained model or backbone is not used
if not pretrained:
if not is_trained:
if trainable_backbone_layers is not None:
warnings.warn(
"Changing trainable_backbone_layers has not effect if "
......@@ -160,16 +168,23 @@ def _validate_trainable_layers(
return trainable_backbone_layers
@handle_legacy_interface(
weights=(
"pretrained",
lambda kwargs: get_enum_from_fn(mobilenet.__dict__[kwargs["backbone_name"]]).from_str("IMAGENET1K_V1"),
),
)
def mobilenet_backbone(
*,
backbone_name: str,
pretrained: bool,
weights: Optional[WeightsEnum],
fpn: bool,
norm_layer: Callable[..., nn.Module] = misc_nn_ops.FrozenBatchNorm2d,
trainable_layers: int = 2,
returned_layers: Optional[List[int]] = None,
extra_blocks: Optional[ExtraFPNBlock] = None,
) -> nn.Module:
backbone = mobilenet.__dict__[backbone_name](pretrained=pretrained, norm_layer=norm_layer)
backbone = mobilenet.__dict__[backbone_name](weights=weights, norm_layer=norm_layer)
return _mobilenet_extractor(backbone, fpn, trainable_layers, returned_layers, extra_blocks)
......
from typing import Any, Optional, Union
import torch.nn.functional as F
from torch import nn
from torchvision.ops import MultiScaleRoIAlign
from ..._internally_replaced_utils import load_state_dict_from_url
from ...ops import misc as misc_nn_ops
from ..mobilenetv3 import mobilenet_v3_large
from ..resnet import resnet50
from ...transforms._presets import ObjectDetection, InterpolationMode
from .._api import WeightsEnum, Weights
from .._meta import _COCO_CATEGORIES
from .._utils import handle_legacy_interface, _ovewrite_value_param
from ..mobilenetv3 import MobileNet_V3_Large_Weights, mobilenet_v3_large
from ..resnet import ResNet50_Weights, resnet50
from ._utils import overwrite_eps
from .anchor_utils import AnchorGenerator
from .backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers, _mobilenet_extractor
......@@ -17,9 +22,12 @@ from .transform import GeneralizedRCNNTransform
__all__ = [
"FasterRCNN",
"FasterRCNN_ResNet50_FPN_Weights",
"FasterRCNN_MobileNet_V3_Large_FPN_Weights",
"FasterRCNN_MobileNet_V3_Large_320_FPN_Weights",
"fasterrcnn_resnet50_fpn",
"fasterrcnn_mobilenet_v3_large_320_fpn",
"fasterrcnn_mobilenet_v3_large_fpn",
"fasterrcnn_mobilenet_v3_large_320_fpn",
]
......@@ -109,7 +117,7 @@ class FasterRCNN(GeneralizedRCNN):
>>> from torchvision.models.detection.rpn import AnchorGenerator
>>> # load a pre-trained model for classification and return
>>> # only the features
>>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features
>>> backbone = torchvision.models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).features
>>> # FasterRCNN needs to know the number of
>>> # output channels in a backbone. For mobilenet_v2, it's 1280
>>> # so we need to add it here
......@@ -316,16 +324,70 @@ class FastRCNNPredictor(nn.Module):
return scores, bbox_deltas
model_urls = {
"fasterrcnn_resnet50_fpn_coco": "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth",
"fasterrcnn_mobilenet_v3_large_320_fpn_coco": "https://download.pytorch.org/models/fasterrcnn_mobilenet_v3_large_320_fpn-907ea3f9.pth",
"fasterrcnn_mobilenet_v3_large_fpn_coco": "https://download.pytorch.org/models/fasterrcnn_mobilenet_v3_large_fpn-fb6a3cc7.pth",
_COMMON_META = {
"task": "image_object_detection",
"architecture": "FasterRCNN",
"publication_year": 2015,
"categories": _COCO_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
}
class FasterRCNN_ResNet50_FPN_Weights(WeightsEnum):
COCO_V1 = Weights(
url="https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth",
transforms=ObjectDetection,
meta={
**_COMMON_META,
"num_params": 41755286,
"recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-resnet-50-fpn",
"map": 37.0,
},
)
DEFAULT = COCO_V1
class FasterRCNN_MobileNet_V3_Large_FPN_Weights(WeightsEnum):
COCO_V1 = Weights(
url="https://download.pytorch.org/models/fasterrcnn_mobilenet_v3_large_fpn-fb6a3cc7.pth",
transforms=ObjectDetection,
meta={
**_COMMON_META,
"num_params": 19386354,
"recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-mobilenetv3-large-fpn",
"map": 32.8,
},
)
DEFAULT = COCO_V1
class FasterRCNN_MobileNet_V3_Large_320_FPN_Weights(WeightsEnum):
COCO_V1 = Weights(
url="https://download.pytorch.org/models/fasterrcnn_mobilenet_v3_large_320_fpn-907ea3f9.pth",
transforms=ObjectDetection,
meta={
**_COMMON_META,
"num_params": 19386354,
"recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-mobilenetv3-large-320-fpn",
"map": 22.8,
},
)
DEFAULT = COCO_V1
@handle_legacy_interface(
weights=("pretrained", FasterRCNN_ResNet50_FPN_Weights.COCO_V1),
weights_backbone=("pretrained_backbone", ResNet50_Weights.IMAGENET1K_V1),
)
def fasterrcnn_resnet50_fpn(
pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=None, **kwargs
):
*,
weights: Optional[FasterRCNN_ResNet50_FPN_Weights] = None,
progress: bool = True,
num_classes: Optional[int] = None,
weights_backbone: Optional[ResNet50_Weights] = ResNet50_Weights.IMAGENET1K_V1,
trainable_backbone_layers: Optional[int] = None,
**kwargs: Any,
) -> FasterRCNN:
"""
Constructs a Faster R-CNN model with a ResNet-50-FPN backbone.
......@@ -362,7 +424,7 @@ def fasterrcnn_resnet50_fpn(
Example::
>>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
>>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
>>> # For training
>>> images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 11, 4)
>>> boxes[:, :, 2:4] = boxes[:, :, 0:2] + boxes[:, :, 2:4]
......@@ -384,51 +446,60 @@ def fasterrcnn_resnet50_fpn(
>>> torch.onnx.export(model, x, "faster_rcnn.onnx", opset_version = 11)
Args:
pretrained (bool): If True, returns a model pre-trained on COCO train2017
weights (FasterRCNN_ResNet50_FPN_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr
num_classes (int): number of output classes of the model (including the background)
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
num_classes (int, optional): number of output classes of the model (including the background)
weights_backbone (ResNet50_Weights, optional): The pretrained weights for the backbone
trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block.
Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. If ``None`` is
passed (the default) this value is set to 3.
"""
is_trained = pretrained or pretrained_backbone
weights = FasterRCNN_ResNet50_FPN_Weights.verify(weights)
weights_backbone = ResNet50_Weights.verify(weights_backbone)
if weights is not None:
weights_backbone = None
num_classes = _ovewrite_value_param(num_classes, len(weights.meta["categories"]))
elif num_classes is None:
num_classes = 91
is_trained = weights is not None or weights_backbone is not None
trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3)
norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
if pretrained:
# no need to download the backbone if pretrained is set
pretrained_backbone = False
backbone = resnet50(pretrained=pretrained_backbone, progress=progress, norm_layer=norm_layer)
backbone = resnet50(weights=weights_backbone, progress=progress, norm_layer=norm_layer)
backbone = _resnet_fpn_extractor(backbone, trainable_backbone_layers)
model = FasterRCNN(backbone, num_classes, **kwargs)
if pretrained:
state_dict = load_state_dict_from_url(model_urls["fasterrcnn_resnet50_fpn_coco"], progress=progress)
model.load_state_dict(state_dict)
overwrite_eps(model, 0.0)
model = FasterRCNN(backbone, num_classes=num_classes, **kwargs)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
if weights == FasterRCNN_ResNet50_FPN_Weights.COCO_V1:
overwrite_eps(model, 0.0)
return model
def _fasterrcnn_mobilenet_v3_large_fpn(
weights_name,
pretrained=False,
progress=True,
num_classes=91,
pretrained_backbone=True,
trainable_backbone_layers=None,
**kwargs,
):
is_trained = pretrained or pretrained_backbone
*,
weights: Optional[Union[FasterRCNN_MobileNet_V3_Large_FPN_Weights, FasterRCNN_MobileNet_V3_Large_320_FPN_Weights]],
progress: bool,
num_classes: Optional[int],
weights_backbone: Optional[MobileNet_V3_Large_Weights],
trainable_backbone_layers: Optional[int],
**kwargs: Any,
) -> FasterRCNN:
if weights is not None:
weights_backbone = None
num_classes = _ovewrite_value_param(num_classes, len(weights.meta["categories"]))
elif num_classes is None:
num_classes = 91
is_trained = weights is not None or weights_backbone is not None
trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 6, 3)
norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
if pretrained:
pretrained_backbone = False
backbone = mobilenet_v3_large(pretrained=pretrained_backbone, progress=progress, norm_layer=norm_layer)
backbone = mobilenet_v3_large(weights=weights_backbone, progress=progress, norm_layer=norm_layer)
backbone = _mobilenet_extractor(backbone, True, trainable_backbone_layers)
anchor_sizes = (
(
32,
......@@ -439,21 +510,29 @@ def _fasterrcnn_mobilenet_v3_large_fpn(
),
) * 3
aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
model = FasterRCNN(
backbone, num_classes, rpn_anchor_generator=AnchorGenerator(anchor_sizes, aspect_ratios), **kwargs
)
if pretrained:
if model_urls.get(weights_name, None) is None:
raise ValueError(f"No checkpoint is available for model {weights_name}")
state_dict = load_state_dict_from_url(model_urls[weights_name], progress=progress)
model.load_state_dict(state_dict)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
return model
@handle_legacy_interface(
weights=("pretrained", FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.COCO_V1),
weights_backbone=("pretrained_backbone", MobileNet_V3_Large_Weights.IMAGENET1K_V1),
)
def fasterrcnn_mobilenet_v3_large_320_fpn(
pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=None, **kwargs
):
*,
weights: Optional[FasterRCNN_MobileNet_V3_Large_320_FPN_Weights] = None,
progress: bool = True,
num_classes: Optional[int] = None,
weights_backbone: Optional[MobileNet_V3_Large_Weights] = MobileNet_V3_Large_Weights.IMAGENET1K_V1,
trainable_backbone_layers: Optional[int] = None,
**kwargs: Any,
) -> FasterRCNN:
"""
Constructs a low resolution Faster R-CNN model with a MobileNetV3-Large FPN backbone tunned for mobile use-cases.
It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See
......@@ -462,21 +541,23 @@ def fasterrcnn_mobilenet_v3_large_320_fpn(
Example::
>>> model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(pretrained=True)
>>> model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT)
>>> model.eval()
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
>>> predictions = model(x)
Args:
pretrained (bool): If True, returns a model pre-trained on COCO train2017
weights (FasterRCNN_MobileNet_V3_Large_320_FPN_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr
num_classes (int): number of output classes of the model (including the background)
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
num_classes (int, optional): number of output classes of the model (including the background)
weights_backbone (MobileNet_V3_Large_Weights, optional): The pretrained weights for the backbone
trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block.
Valid values are between 0 and 6, with 6 meaning all backbone layers are trainable. If ``None`` is
passed (the default) this value is set to 3.
"""
weights_name = "fasterrcnn_mobilenet_v3_large_320_fpn_coco"
weights = FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.verify(weights)
weights_backbone = MobileNet_V3_Large_Weights.verify(weights_backbone)
defaults = {
"min_size": 320,
"max_size": 640,
......@@ -487,19 +568,28 @@ def fasterrcnn_mobilenet_v3_large_320_fpn(
kwargs = {**defaults, **kwargs}
return _fasterrcnn_mobilenet_v3_large_fpn(
weights_name,
pretrained=pretrained,
weights=weights,
progress=progress,
num_classes=num_classes,
pretrained_backbone=pretrained_backbone,
weights_backbone=weights_backbone,
trainable_backbone_layers=trainable_backbone_layers,
**kwargs,
)
@handle_legacy_interface(
weights=("pretrained", FasterRCNN_MobileNet_V3_Large_FPN_Weights.COCO_V1),
weights_backbone=("pretrained_backbone", MobileNet_V3_Large_Weights.IMAGENET1K_V1),
)
def fasterrcnn_mobilenet_v3_large_fpn(
pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=None, **kwargs
):
*,
weights: Optional[FasterRCNN_MobileNet_V3_Large_FPN_Weights] = None,
progress: bool = True,
num_classes: Optional[int] = None,
weights_backbone: Optional[MobileNet_V3_Large_Weights] = MobileNet_V3_Large_Weights.IMAGENET1K_V1,
trainable_backbone_layers: Optional[int] = None,
**kwargs: Any,
) -> FasterRCNN:
"""
Constructs a high resolution Faster R-CNN model with a MobileNetV3-Large FPN backbone.
It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See
......@@ -508,32 +598,33 @@ def fasterrcnn_mobilenet_v3_large_fpn(
Example::
>>> model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)
>>> model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(weights=FasterRCNN_MobileNet_V3_Large_FPN_Weights.DEFAULT)
>>> model.eval()
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
>>> predictions = model(x)
Args:
pretrained (bool): If True, returns a model pre-trained on COCO train2017
weights (FasterRCNN_MobileNet_V3_Large_FPN_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr
num_classes (int): number of output classes of the model (including the background)
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
num_classes (int, optional): number of output classes of the model (including the background)
weights_backbone (MobileNet_V3_Large_Weights, optional): The pretrained weights for the backbone
trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block.
Valid values are between 0 and 6, with 6 meaning all backbone layers are trainable. If ``None`` is
passed (the default) this value is set to 3.
"""
weights_name = "fasterrcnn_mobilenet_v3_large_fpn_coco"
weights = FasterRCNN_MobileNet_V3_Large_FPN_Weights.verify(weights)
weights_backbone = MobileNet_V3_Large_Weights.verify(weights_backbone)
defaults = {
"rpn_score_thresh": 0.05,
}
kwargs = {**defaults, **kwargs}
return _fasterrcnn_mobilenet_v3_large_fpn(
weights_name,
pretrained=pretrained,
weights=weights,
progress=progress,
num_classes=num_classes,
pretrained_backbone=pretrained_backbone,
weights_backbone=weights_backbone,
trainable_backbone_layers=trainable_backbone_layers,
**kwargs,
)
......@@ -2,25 +2,32 @@ import math
import warnings
from collections import OrderedDict
from functools import partial
from typing import Callable, Dict, List, Tuple, Optional
from typing import Any, Callable, Dict, List, Tuple, Optional
import torch
from torch import nn, Tensor
from ..._internally_replaced_utils import load_state_dict_from_url
from ...ops import sigmoid_focal_loss, generalized_box_iou_loss
from ...ops import boxes as box_ops
from ...ops import misc as misc_nn_ops
from ...ops.feature_pyramid_network import LastLevelP6P7
from ...transforms._presets import ObjectDetection, InterpolationMode
from ...utils import _log_api_usage_once
from ..resnet import resnet50
from .._api import WeightsEnum, Weights
from .._meta import _COCO_CATEGORIES
from .._utils import handle_legacy_interface, _ovewrite_value_param
from ..resnet import ResNet50_Weights, resnet50
from . import _utils as det_utils
from .anchor_utils import AnchorGenerator
from .backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers
from .transform import GeneralizedRCNNTransform
__all__ = ["FCOS", "fcos_resnet50_fpn"]
__all__ = [
"FCOS",
"FCOS_ResNet50_FPN_Weights",
"fcos_resnet50_fpn",
]
class FCOSHead(nn.Module):
......@@ -318,7 +325,7 @@ class FCOS(nn.Module):
>>> from torchvision.models.detection.anchor_utils import AnchorGenerator
>>> # load a pre-trained model for classification and return
>>> # only the features
>>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features
>>> backbone = torchvision.models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).features
>>> # FCOS needs to know the number of
>>> # output channels in a backbone. For mobilenet_v2, it's 1280
>>> # so we need to add it here
......@@ -636,19 +643,37 @@ class FCOS(nn.Module):
return self.eager_outputs(losses, detections)
model_urls = {
"fcos_resnet50_fpn_coco": "https://download.pytorch.org/models/fcos_resnet50_fpn_coco-99b0c9b7.pth",
}
class FCOS_ResNet50_FPN_Weights(WeightsEnum):
COCO_V1 = Weights(
url="https://download.pytorch.org/models/fcos_resnet50_fpn_coco-99b0c9b7.pth",
transforms=ObjectDetection,
meta={
"task": "image_object_detection",
"architecture": "FCOS",
"publication_year": 2019,
"num_params": 32269600,
"categories": _COCO_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
"recipe": "https://github.com/pytorch/vision/tree/main/references/detection#fcos-resnet-50-fpn",
"map": 39.2,
},
)
DEFAULT = COCO_V1
@handle_legacy_interface(
weights=("pretrained", FCOS_ResNet50_FPN_Weights.COCO_V1),
weights_backbone=("pretrained_backbone", ResNet50_Weights.IMAGENET1K_V1),
)
def fcos_resnet50_fpn(
pretrained: bool = False,
*,
weights: Optional[FCOS_ResNet50_FPN_Weights] = None,
progress: bool = True,
num_classes: int = 91,
pretrained_backbone: bool = True,
num_classes: Optional[int] = None,
weights_backbone: Optional[ResNet50_Weights] = ResNet50_Weights.IMAGENET1K_V1,
trainable_backbone_layers: Optional[int] = None,
**kwargs,
):
**kwargs: Any,
) -> FCOS:
"""
Constructs a FCOS model with a ResNet-50-FPN backbone.
......@@ -682,34 +707,40 @@ def fcos_resnet50_fpn(
Example:
>>> model = torchvision.models.detection.fcos_resnet50_fpn(pretrained=True)
>>> model = torchvision.models.detection.fcos_resnet50_fpn(weights=FCOS_ResNet50_FPN_Weights.DEFAULT)
>>> model.eval()
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
>>> predictions = model(x)
Args:
pretrained (bool): If True, returns a model pre-trained on COCO train2017
weights (FCOS_ResNet50_FPN_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr
num_classes (int): number of output classes of the model (including the background)
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
num_classes (int, optional): number of output classes of the model (including the background)
weights_backbone (ResNet50_Weights, optional): The pretrained weights for the backbone
trainable_backbone_layers (int, optional): number of trainable (not frozen) resnet layers starting
from final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are
trainable. If ``None`` is passed (the default) this value is set to 3. Default: None
"""
is_trained = pretrained or pretrained_backbone
weights = FCOS_ResNet50_FPN_Weights.verify(weights)
weights_backbone = ResNet50_Weights.verify(weights_backbone)
if weights is not None:
weights_backbone = None
num_classes = _ovewrite_value_param(num_classes, len(weights.meta["categories"]))
elif num_classes is None:
num_classes = 91
is_trained = weights is not None or weights_backbone is not None
trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3)
norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
if pretrained:
# no need to download the backbone if pretrained is set
pretrained_backbone = False
backbone = resnet50(pretrained=pretrained_backbone, progress=progress, norm_layer=norm_layer)
backbone = resnet50(weights=weights_backbone, progress=progress, norm_layer=norm_layer)
backbone = _resnet_fpn_extractor(
backbone, trainable_backbone_layers, returned_layers=[2, 3, 4], extra_blocks=LastLevelP6P7(256, 256)
)
model = FCOS(backbone, num_classes, **kwargs)
if pretrained:
state_dict = load_state_dict_from_url(model_urls["fcos_resnet50_fpn_coco"], progress=progress)
model.load_state_dict(state_dict)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
return model
from typing import Any, Optional
import torch
from torch import nn
from torchvision.ops import MultiScaleRoIAlign
from ..._internally_replaced_utils import load_state_dict_from_url
from ...ops import misc as misc_nn_ops
from ..resnet import resnet50
from ...transforms._presets import ObjectDetection, InterpolationMode
from .._api import WeightsEnum, Weights
from .._meta import _COCO_PERSON_CATEGORIES, _COCO_PERSON_KEYPOINT_NAMES
from .._utils import handle_legacy_interface, _ovewrite_value_param
from ..resnet import ResNet50_Weights, resnet50
from ._utils import overwrite_eps
from .backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers
from .faster_rcnn import FasterRCNN
__all__ = ["KeypointRCNN", "keypointrcnn_resnet50_fpn"]
__all__ = [
"KeypointRCNN",
"KeypointRCNN_ResNet50_FPN_Weights",
"keypointrcnn_resnet50_fpn",
]
class KeypointRCNN(FasterRCNN):
......@@ -110,7 +119,7 @@ class KeypointRCNN(FasterRCNN):
>>>
>>> # load a pre-trained model for classification and return
>>> # only the features
>>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features
>>> backbone = torchvision.models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).features
>>> # KeypointRCNN needs to know the number of
>>> # output channels in a backbone. For mobilenet_v2, it's 1280
>>> # so we need to add it here
......@@ -296,22 +305,61 @@ class KeypointRCNNPredictor(nn.Module):
)
model_urls = {
# legacy model for BC reasons, see https://github.com/pytorch/vision/issues/1606
"keypointrcnn_resnet50_fpn_coco_legacy": "https://download.pytorch.org/models/keypointrcnn_resnet50_fpn_coco-9f466800.pth",
"keypointrcnn_resnet50_fpn_coco": "https://download.pytorch.org/models/keypointrcnn_resnet50_fpn_coco-fc266e95.pth",
_COMMON_META = {
"task": "image_object_detection",
"architecture": "KeypointRCNN",
"publication_year": 2017,
"categories": _COCO_PERSON_CATEGORIES,
"keypoint_names": _COCO_PERSON_KEYPOINT_NAMES,
"interpolation": InterpolationMode.BILINEAR,
}
class KeypointRCNN_ResNet50_FPN_Weights(WeightsEnum):
COCO_LEGACY = Weights(
url="https://download.pytorch.org/models/keypointrcnn_resnet50_fpn_coco-9f466800.pth",
transforms=ObjectDetection,
meta={
**_COMMON_META,
"num_params": 59137258,
"recipe": "https://github.com/pytorch/vision/issues/1606",
"map": 50.6,
"map_kp": 61.1,
},
)
COCO_V1 = Weights(
url="https://download.pytorch.org/models/keypointrcnn_resnet50_fpn_coco-fc266e95.pth",
transforms=ObjectDetection,
meta={
**_COMMON_META,
"num_params": 59137258,
"recipe": "https://github.com/pytorch/vision/tree/main/references/detection#keypoint-r-cnn",
"map": 54.6,
"map_kp": 65.0,
},
)
DEFAULT = COCO_V1
@handle_legacy_interface(
weights=(
"pretrained",
lambda kwargs: KeypointRCNN_ResNet50_FPN_Weights.COCO_LEGACY
if kwargs["pretrained"] == "legacy"
else KeypointRCNN_ResNet50_FPN_Weights.COCO_V1,
),
weights_backbone=("pretrained_backbone", ResNet50_Weights.IMAGENET1K_V1),
)
def keypointrcnn_resnet50_fpn(
pretrained=False,
progress=True,
num_classes=2,
num_keypoints=17,
pretrained_backbone=True,
trainable_backbone_layers=None,
**kwargs,
):
*,
weights: Optional[KeypointRCNN_ResNet50_FPN_Weights] = None,
progress: bool = True,
num_classes: Optional[int] = None,
num_keypoints: Optional[int] = None,
weights_backbone: Optional[ResNet50_Weights] = ResNet50_Weights.IMAGENET1K_V1,
trainable_backbone_layers: Optional[int] = None,
**kwargs: Any,
) -> KeypointRCNN:
"""
Constructs a Keypoint R-CNN model with a ResNet-50-FPN backbone.
......@@ -350,7 +398,7 @@ def keypointrcnn_resnet50_fpn(
Example::
>>> model = torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=True)
>>> model = torchvision.models.detection.keypointrcnn_resnet50_fpn(weights=KeypointRCNN_ResNet50_FPN_Weights.DEFAULT)
>>> model.eval()
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
>>> predictions = model(x)
......@@ -359,31 +407,39 @@ def keypointrcnn_resnet50_fpn(
>>> torch.onnx.export(model, x, "keypoint_rcnn.onnx", opset_version = 11)
Args:
pretrained (bool): If True, returns a model pre-trained on COCO train2017
weights (KeypointRCNN_ResNet50_FPN_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr
num_classes (int): number of output classes of the model (including the background)
num_keypoints (int): number of keypoints, default 17
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
num_classes (int, optional): number of output classes of the model (including the background)
num_keypoints (int, optional): number of keypoints
weights_backbone (ResNet50_Weights, optional): The pretrained weights for the backbone
trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block.
Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. If ``None`` is
passed (the default) this value is set to 3.
"""
is_trained = pretrained or pretrained_backbone
weights = KeypointRCNN_ResNet50_FPN_Weights.verify(weights)
weights_backbone = ResNet50_Weights.verify(weights_backbone)
if weights is not None:
weights_backbone = None
num_classes = _ovewrite_value_param(num_classes, len(weights.meta["categories"]))
num_keypoints = _ovewrite_value_param(num_keypoints, len(weights.meta["keypoint_names"]))
else:
if num_classes is None:
num_classes = 2
if num_keypoints is None:
num_keypoints = 17
is_trained = weights is not None or weights_backbone is not None
trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3)
norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
if pretrained:
# no need to download the backbone if pretrained is set
pretrained_backbone = False
backbone = resnet50(pretrained=pretrained_backbone, progress=progress, norm_layer=norm_layer)
backbone = resnet50(weights=weights_backbone, progress=progress, norm_layer=norm_layer)
backbone = _resnet_fpn_extractor(backbone, trainable_backbone_layers)
model = KeypointRCNN(backbone, num_classes, num_keypoints=num_keypoints, **kwargs)
if pretrained:
key = "keypointrcnn_resnet50_fpn_coco"
if pretrained == "legacy":
key += "_legacy"
state_dict = load_state_dict_from_url(model_urls[key], progress=progress)
model.load_state_dict(state_dict)
overwrite_eps(model, 0.0)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
if weights == KeypointRCNN_ResNet50_FPN_Weights.COCO_V1:
overwrite_eps(model, 0.0)
return model
from collections import OrderedDict
from typing import Any, Optional
from torch import nn
from torchvision.ops import MultiScaleRoIAlign
from ..._internally_replaced_utils import load_state_dict_from_url
from ...ops import misc as misc_nn_ops
from ..resnet import resnet50
from ...transforms._presets import ObjectDetection, InterpolationMode
from .._api import WeightsEnum, Weights
from .._meta import _COCO_CATEGORIES
from .._utils import handle_legacy_interface, _ovewrite_value_param
from ..resnet import ResNet50_Weights, resnet50
from ._utils import overwrite_eps
from .backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers
from .faster_rcnn import FasterRCNN
__all__ = [
"MaskRCNN",
"MaskRCNN_ResNet50_FPN_Weights",
"maskrcnn_resnet50_fpn",
]
......@@ -112,7 +118,7 @@ class MaskRCNN(FasterRCNN):
>>>
>>> # load a pre-trained model for classification and return
>>> # only the features
>>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features
>>> backbone = torchvision.models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).features
>>> # MaskRCNN needs to know the number of
>>> # output channels in a backbone. For mobilenet_v2, it's 1280
>>> # so we need to add it here
......@@ -299,14 +305,38 @@ class MaskRCNNPredictor(nn.Sequential):
# nn.init.constant_(param, 0)
model_urls = {
"maskrcnn_resnet50_fpn_coco": "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth",
}
class MaskRCNN_ResNet50_FPN_Weights(WeightsEnum):
COCO_V1 = Weights(
url="https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth",
transforms=ObjectDetection,
meta={
"task": "image_object_detection",
"architecture": "MaskRCNN",
"publication_year": 2017,
"num_params": 44401393,
"categories": _COCO_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
"recipe": "https://github.com/pytorch/vision/tree/main/references/detection#mask-r-cnn",
"map": 37.9,
"map_mask": 34.6,
},
)
DEFAULT = COCO_V1
@handle_legacy_interface(
weights=("pretrained", MaskRCNN_ResNet50_FPN_Weights.COCO_V1),
weights_backbone=("pretrained_backbone", ResNet50_Weights.IMAGENET1K_V1),
)
def maskrcnn_resnet50_fpn(
pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=None, **kwargs
):
*,
weights: Optional[MaskRCNN_ResNet50_FPN_Weights] = None,
progress: bool = True,
num_classes: Optional[int] = None,
weights_backbone: Optional[ResNet50_Weights] = ResNet50_Weights.IMAGENET1K_V1,
trainable_backbone_layers: Optional[int] = None,
**kwargs: Any,
) -> MaskRCNN:
"""
Constructs a Mask R-CNN model with a ResNet-50-FPN backbone.
......@@ -346,7 +376,7 @@ def maskrcnn_resnet50_fpn(
Example::
>>> model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
>>> model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights=MaskRCNN_ResNet50_FPN_Weights.DEFAULT)
>>> model.eval()
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
>>> predictions = model(x)
......@@ -355,27 +385,34 @@ def maskrcnn_resnet50_fpn(
>>> torch.onnx.export(model, x, "mask_rcnn.onnx", opset_version = 11)
Args:
pretrained (bool): If True, returns a model pre-trained on COCO train2017
weights (MaskRCNN_ResNet50_FPN_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr
num_classes (int): number of output classes of the model (including the background)
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
num_classes (int, optional): number of output classes of the model (including the background)
weights_backbone (ResNet50_Weights, optional): The pretrained weights for the backbone
trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block.
Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. If ``None`` is
passed (the default) this value is set to 3.
"""
is_trained = pretrained or pretrained_backbone
weights = MaskRCNN_ResNet50_FPN_Weights.verify(weights)
weights_backbone = ResNet50_Weights.verify(weights_backbone)
if weights is not None:
weights_backbone = None
num_classes = _ovewrite_value_param(num_classes, len(weights.meta["categories"]))
elif num_classes is None:
num_classes = 91
is_trained = weights is not None or weights_backbone is not None
trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3)
norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
if pretrained:
# no need to download the backbone if pretrained is set
pretrained_backbone = False
backbone = resnet50(pretrained=pretrained_backbone, progress=progress, norm_layer=norm_layer)
backbone = resnet50(weights=weights_backbone, progress=progress, norm_layer=norm_layer)
backbone = _resnet_fpn_extractor(backbone, trainable_backbone_layers)
model = MaskRCNN(backbone, num_classes, **kwargs)
if pretrained:
state_dict = load_state_dict_from_url(model_urls["maskrcnn_resnet50_fpn_coco"], progress=progress)
model.load_state_dict(state_dict)
overwrite_eps(model, 0.0)
model = MaskRCNN(backbone, num_classes=num_classes, **kwargs)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
if weights == MaskRCNN_ResNet50_FPN_Weights.COCO_V1:
overwrite_eps(model, 0.0)
return model
import math
import warnings
from collections import OrderedDict
from typing import Dict, List, Tuple, Optional
from typing import Any, Dict, List, Tuple, Optional
import torch
from torch import nn, Tensor
from ..._internally_replaced_utils import load_state_dict_from_url
from ...ops import sigmoid_focal_loss
from ...ops import boxes as box_ops
from ...ops import misc as misc_nn_ops
from ...ops.feature_pyramid_network import LastLevelP6P7
from ...transforms._presets import ObjectDetection, InterpolationMode
from ...utils import _log_api_usage_once
from ..resnet import resnet50
from .._api import WeightsEnum, Weights
from .._meta import _COCO_CATEGORIES
from .._utils import handle_legacy_interface, _ovewrite_value_param
from ..resnet import ResNet50_Weights, resnet50
from . import _utils as det_utils
from ._utils import overwrite_eps
from .anchor_utils import AnchorGenerator
......@@ -20,7 +23,11 @@ from .backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers
from .transform import GeneralizedRCNNTransform
__all__ = ["RetinaNet", "retinanet_resnet50_fpn"]
__all__ = [
"RetinaNet",
"RetinaNet_ResNet50_FPN_Weights",
"retinanet_resnet50_fpn",
]
def _sum(x: List[Tensor]) -> Tensor:
......@@ -286,7 +293,7 @@ class RetinaNet(nn.Module):
>>> from torchvision.models.detection.anchor_utils import AnchorGenerator
>>> # load a pre-trained model for classification and return
>>> # only the features
>>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features
>>> backbone = torchvision.models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).features
>>> # RetinaNet needs to know the number of
>>> # output channels in a backbone. For mobilenet_v2, it's 1280
>>> # so we need to add it here
......@@ -578,14 +585,37 @@ class RetinaNet(nn.Module):
return self.eager_outputs(losses, detections)
model_urls = {
"retinanet_resnet50_fpn_coco": "https://download.pytorch.org/models/retinanet_resnet50_fpn_coco-eeacb38b.pth",
}
class RetinaNet_ResNet50_FPN_Weights(WeightsEnum):
COCO_V1 = Weights(
url="https://download.pytorch.org/models/retinanet_resnet50_fpn_coco-eeacb38b.pth",
transforms=ObjectDetection,
meta={
"task": "image_object_detection",
"architecture": "RetinaNet",
"publication_year": 2017,
"num_params": 34014999,
"categories": _COCO_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
"recipe": "https://github.com/pytorch/vision/tree/main/references/detection#retinanet",
"map": 36.4,
},
)
DEFAULT = COCO_V1
@handle_legacy_interface(
weights=("pretrained", RetinaNet_ResNet50_FPN_Weights.COCO_V1),
weights_backbone=("pretrained_backbone", ResNet50_Weights.IMAGENET1K_V1),
)
def retinanet_resnet50_fpn(
pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=None, **kwargs
):
*,
weights: Optional[RetinaNet_ResNet50_FPN_Weights] = None,
progress: bool = True,
num_classes: Optional[int] = None,
weights_backbone: Optional[ResNet50_Weights] = ResNet50_Weights.IMAGENET1K_V1,
trainable_backbone_layers: Optional[int] = None,
**kwargs: Any,
) -> RetinaNet:
"""
Constructs a RetinaNet model with a ResNet-50-FPN backbone.
......@@ -619,36 +649,43 @@ def retinanet_resnet50_fpn(
Example::
>>> model = torchvision.models.detection.retinanet_resnet50_fpn(pretrained=True)
>>> model = torchvision.models.detection.retinanet_resnet50_fpn(weights=RetinaNet_ResNet50_FPN_Weights.DEFAULT)
>>> model.eval()
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
>>> predictions = model(x)
Args:
pretrained (bool): If True, returns a model pre-trained on COCO train2017
weights (RetinaNet_ResNet50_FPN_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr
num_classes (int): number of output classes of the model (including the background)
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
num_classes (int, optional): number of output classes of the model (including the background)
weights_backbone (ResNet50_Weights, optional): The pretrained weights for the backbone
trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block.
Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. If ``None`` is
passed (the default) this value is set to 3.
"""
is_trained = pretrained or pretrained_backbone
weights = RetinaNet_ResNet50_FPN_Weights.verify(weights)
weights_backbone = ResNet50_Weights.verify(weights_backbone)
if weights is not None:
weights_backbone = None
num_classes = _ovewrite_value_param(num_classes, len(weights.meta["categories"]))
elif num_classes is None:
num_classes = 91
is_trained = weights is not None or weights_backbone is not None
trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3)
norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
if pretrained:
# no need to download the backbone if pretrained is set
pretrained_backbone = False
backbone = resnet50(pretrained=pretrained_backbone, progress=progress, norm_layer=norm_layer)
backbone = resnet50(weights=weights_backbone, progress=progress, norm_layer=norm_layer)
# skip P2 because it generates too many anchors (according to their paper)
backbone = _resnet_fpn_extractor(
backbone, trainable_backbone_layers, returned_layers=[2, 3, 4], extra_blocks=LastLevelP6P7(256, 256)
)
model = RetinaNet(backbone, num_classes, **kwargs)
if pretrained:
state_dict = load_state_dict_from_url(model_urls["retinanet_resnet50_fpn_coco"], progress=progress)
model.load_state_dict(state_dict)
overwrite_eps(model, 0.0)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
if weights == RetinaNet_ResNet50_FPN_Weights.COCO_V1:
overwrite_eps(model, 0.0)
return model
......@@ -4,8 +4,7 @@ import torch
import torch.nn.functional as F
import torchvision
from torch import nn, Tensor
from torchvision.ops import boxes as box_ops
from torchvision.ops import roi_align
from torchvision.ops import boxes as box_ops, roi_align
from . import _utils as det_utils
......
......@@ -6,27 +6,42 @@ import torch
import torch.nn.functional as F
from torch import nn, Tensor
from ..._internally_replaced_utils import load_state_dict_from_url
from ...ops import boxes as box_ops
from ...transforms._presets import ObjectDetection, InterpolationMode
from ...utils import _log_api_usage_once
from .. import vgg
from .._api import WeightsEnum, Weights
from .._meta import _COCO_CATEGORIES
from .._utils import handle_legacy_interface, _ovewrite_value_param
from ..vgg import VGG, VGG16_Weights, vgg16
from . import _utils as det_utils
from .anchor_utils import DefaultBoxGenerator
from .backbone_utils import _validate_trainable_layers
from .transform import GeneralizedRCNNTransform
__all__ = ["SSD", "ssd300_vgg16"]
model_urls = {
"ssd300_vgg16_coco": "https://download.pytorch.org/models/ssd300_vgg16_coco-b556d3b4.pth",
}
backbone_urls = {
# We port the features of a VGG16 backbone trained by amdegroot because unlike the one on TorchVision, it uses the
# same input standardization method as the paper. Ref: https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth
# Only the `features` weights have proper values, those on the `classifier` module are filled with nans.
"vgg16_features": "https://download.pytorch.org/models/vgg16_features-amdegroot-88682ab5.pth"
}
__all__ = [
"SSD300_VGG16_Weights",
"ssd300_vgg16",
]
class SSD300_VGG16_Weights(WeightsEnum):
COCO_V1 = Weights(
url="https://download.pytorch.org/models/ssd300_vgg16_coco-b556d3b4.pth",
transforms=ObjectDetection,
meta={
"task": "image_object_detection",
"architecture": "SSD",
"publication_year": 2015,
"num_params": 35641826,
"size": (300, 300),
"categories": _COCO_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
"recipe": "https://github.com/pytorch/vision/tree/main/references/detection#ssd300-vgg16",
"map": 25.1,
},
)
DEFAULT = COCO_V1
def _xavier_init(conv: nn.Module):
......@@ -528,7 +543,7 @@ class SSDFeatureExtractorVGG(nn.Module):
return OrderedDict([(str(i), v) for i, v in enumerate(output)])
def _vgg_extractor(backbone: vgg.VGG, highres: bool, trainable_layers: int):
def _vgg_extractor(backbone: VGG, highres: bool, trainable_layers: int):
backbone = backbone.features
# Gather the indices of maxpools. These are the locations of output blocks.
stage_indices = [0] + [i for i, b in enumerate(backbone) if isinstance(b, nn.MaxPool2d)][:-1]
......@@ -546,14 +561,19 @@ def _vgg_extractor(backbone: vgg.VGG, highres: bool, trainable_layers: int):
return SSDFeatureExtractorVGG(backbone, highres)
@handle_legacy_interface(
weights=("pretrained", SSD300_VGG16_Weights.COCO_V1),
weights_backbone=("pretrained_backbone", VGG16_Weights.IMAGENET1K_FEATURES),
)
def ssd300_vgg16(
pretrained: bool = False,
*,
weights: Optional[SSD300_VGG16_Weights] = None,
progress: bool = True,
num_classes: int = 91,
pretrained_backbone: bool = True,
num_classes: Optional[int] = None,
weights_backbone: Optional[VGG16_Weights] = VGG16_Weights.IMAGENET1K_FEATURES,
trainable_backbone_layers: Optional[int] = None,
**kwargs: Any,
):
) -> SSD:
"""Constructs an SSD model with input size 300x300 and a VGG16 backbone.
Reference: `"SSD: Single Shot MultiBox Detector" <https://arxiv.org/abs/1512.02325>`_.
......@@ -585,37 +605,38 @@ def ssd300_vgg16(
Example:
>>> model = torchvision.models.detection.ssd300_vgg16(pretrained=True)
>>> model = torchvision.models.detection.ssd300_vgg16(weights=SSD300_VGG16_Weights.DEFAULT)
>>> model.eval()
>>> x = [torch.rand(3, 300, 300), torch.rand(3, 500, 400)]
>>> predictions = model(x)
Args:
pretrained (bool): If True, returns a model pre-trained on COCO train2017
weights (SSD300_VGG16_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr
num_classes (int): number of output classes of the model (including the background)
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
num_classes (int, optional): number of output classes of the model (including the background)
weights_backbone (VGG16_Weights, optional): The pretrained weights for the backbone
trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block.
Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. If ``None`` is
passed (the default) this value is set to 4.
"""
weights = SSD300_VGG16_Weights.verify(weights)
weights_backbone = VGG16_Weights.verify(weights_backbone)
if "size" in kwargs:
warnings.warn("The size of the model is already fixed; ignoring the argument.")
warnings.warn("The size of the model is already fixed; ignoring the parameter.")
if weights is not None:
weights_backbone = None
num_classes = _ovewrite_value_param(num_classes, len(weights.meta["categories"]))
elif num_classes is None:
num_classes = 91
trainable_backbone_layers = _validate_trainable_layers(
pretrained or pretrained_backbone, trainable_backbone_layers, 5, 4
weights is not None or weights_backbone is not None, trainable_backbone_layers, 5, 4
)
if pretrained:
# no need to download the backbone if pretrained is set
pretrained_backbone = False
# Use custom backbones more appropriate for SSD
backbone = vgg.vgg16(pretrained=False, progress=progress)
if pretrained_backbone:
state_dict = load_state_dict_from_url(backbone_urls["vgg16_features"], progress=progress)
backbone.load_state_dict(state_dict)
backbone = vgg16(weights=weights_backbone, progress=progress)
backbone = _vgg_extractor(backbone, False, trainable_backbone_layers)
anchor_generator = DefaultBoxGenerator(
[[2], [2, 3], [2, 3], [2, 3], [2], [2]],
......@@ -628,12 +649,10 @@ def ssd300_vgg16(
"image_mean": [0.48235, 0.45882, 0.40784],
"image_std": [1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0], # undo the 0-1 scaling of toTensor
}
kwargs = {**defaults, **kwargs}
kwargs: Any = {**defaults, **kwargs}
model = SSD(backbone, anchor_generator, (300, 300), num_classes, **kwargs)
if pretrained:
weights_name = "ssd300_vgg16_coco"
if model_urls.get(weights_name, None) is None:
raise ValueError(f"No checkpoint is available for model {weights_name}")
state_dict = load_state_dict_from_url(model_urls[weights_name], progress=progress)
model.load_state_dict(state_dict)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
return model
......@@ -6,21 +6,24 @@ from typing import Any, Callable, Dict, List, Optional, Union
import torch
from torch import nn, Tensor
from ..._internally_replaced_utils import load_state_dict_from_url
from ...ops.misc import Conv2dNormActivation
from ...transforms._presets import ObjectDetection, InterpolationMode
from ...utils import _log_api_usage_once
from .. import mobilenet
from .._api import WeightsEnum, Weights
from .._meta import _COCO_CATEGORIES
from .._utils import handle_legacy_interface, _ovewrite_value_param
from ..mobilenetv3 import MobileNet_V3_Large_Weights, mobilenet_v3_large
from . import _utils as det_utils
from .anchor_utils import DefaultBoxGenerator
from .backbone_utils import _validate_trainable_layers
from .ssd import SSD, SSDScoringHead
__all__ = ["ssdlite320_mobilenet_v3_large"]
model_urls = {
"ssdlite320_mobilenet_v3_large_coco": "https://download.pytorch.org/models/ssdlite320_mobilenet_v3_large_coco-a79551df.pth"
}
__all__ = [
"SSDLite320_MobileNet_V3_Large_Weights",
"ssdlite320_mobilenet_v3_large",
]
# Building blocks of SSDlite as described in section 6.2 of MobileNetV2 paper
......@@ -181,15 +184,39 @@ def _mobilenet_extractor(
return SSDLiteFeatureExtractorMobileNet(backbone, stage_indices[-2], norm_layer)
class SSDLite320_MobileNet_V3_Large_Weights(WeightsEnum):
COCO_V1 = Weights(
url="https://download.pytorch.org/models/ssdlite320_mobilenet_v3_large_coco-a79551df.pth",
transforms=ObjectDetection,
meta={
"task": "image_object_detection",
"architecture": "SSDLite",
"publication_year": 2018,
"num_params": 3440060,
"size": (320, 320),
"categories": _COCO_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
"recipe": "https://github.com/pytorch/vision/tree/main/references/detection#ssdlite320-mobilenetv3-large",
"map": 21.3,
},
)
DEFAULT = COCO_V1
@handle_legacy_interface(
weights=("pretrained", SSDLite320_MobileNet_V3_Large_Weights.COCO_V1),
weights_backbone=("pretrained_backbone", MobileNet_V3_Large_Weights.IMAGENET1K_V1),
)
def ssdlite320_mobilenet_v3_large(
pretrained: bool = False,
*,
weights: Optional[SSDLite320_MobileNet_V3_Large_Weights] = None,
progress: bool = True,
num_classes: int = 91,
pretrained_backbone: bool = False,
num_classes: Optional[int] = None,
weights_backbone: Optional[MobileNet_V3_Large_Weights] = MobileNet_V3_Large_Weights.IMAGENET1K_V1,
trainable_backbone_layers: Optional[int] = None,
norm_layer: Optional[Callable[..., nn.Module]] = None,
**kwargs: Any,
):
) -> SSD:
"""Constructs an SSDlite model with input size 320x320 and a MobileNetV3 Large backbone, as described at
`"Searching for MobileNetV3"
<https://arxiv.org/abs/1905.02244>`_ and
......@@ -200,41 +227,47 @@ def ssdlite320_mobilenet_v3_large(
Example:
>>> model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(pretrained=True)
>>> model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(weights=SSDLite320_MobileNet_V3_Large_Weights.DEFAULT)
>>> model.eval()
>>> x = [torch.rand(3, 320, 320), torch.rand(3, 500, 400)]
>>> predictions = model(x)
Args:
pretrained (bool): If True, returns a model pre-trained on COCO train2017
weights (FasterRCNN_ResNet50_FPN_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr
num_classes (int): number of output classes of the model (including the background)
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
num_classes (int, optional): number of output classes of the model (including the background)
weights_backbone (ResNet50_Weights, optional): The pretrained weights for the backbone
trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block.
Valid values are between 0 and 6, with 6 meaning all backbone layers are trainable. If ``None`` is
passed (the default) this value is set to 6.
norm_layer (callable, optional): Module specifying the normalization layer to use.
"""
weights = SSDLite320_MobileNet_V3_Large_Weights.verify(weights)
weights_backbone = MobileNet_V3_Large_Weights.verify(weights_backbone)
if "size" in kwargs:
warnings.warn("The size of the model is already fixed; ignoring the argument.")
warnings.warn("The size of the model is already fixed; ignoring the parameter.")
if weights is not None:
weights_backbone = None
num_classes = _ovewrite_value_param(num_classes, len(weights.meta["categories"]))
elif num_classes is None:
num_classes = 91
trainable_backbone_layers = _validate_trainable_layers(
pretrained or pretrained_backbone, trainable_backbone_layers, 6, 6
weights is not None or weights_backbone is not None, trainable_backbone_layers, 6, 6
)
if pretrained:
pretrained_backbone = False
# Enable reduced tail if no pretrained backbone is selected. See Table 6 of MobileNetV3 paper.
reduce_tail = not pretrained_backbone
reduce_tail = weights_backbone is None
if norm_layer is None:
norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.03)
backbone = mobilenet.mobilenet_v3_large(
pretrained=pretrained_backbone, progress=progress, norm_layer=norm_layer, reduced_tail=reduce_tail, **kwargs
backbone = mobilenet_v3_large(
weights=weights_backbone, progress=progress, norm_layer=norm_layer, reduced_tail=reduce_tail, **kwargs
)
if not pretrained_backbone:
if weights_backbone is None:
# Change the default initialization scheme if not pretrained
_normal_init(backbone)
backbone = _mobilenet_extractor(
......@@ -262,7 +295,7 @@ def ssdlite320_mobilenet_v3_large(
"image_mean": [0.5, 0.5, 0.5],
"image_std": [0.5, 0.5, 0.5],
}
kwargs = {**defaults, **kwargs}
kwargs: Any = {**defaults, **kwargs}
model = SSD(
backbone,
anchor_generator,
......@@ -272,10 +305,7 @@ def ssdlite320_mobilenet_v3_large(
**kwargs,
)
if pretrained:
weights_name = "ssdlite320_mobilenet_v3_large_coco"
if model_urls.get(weights_name, None) is None:
raise ValueError(f"No checkpoint is available for model {weights_name}")
state_dict = load_state_dict_from_url(model_urls[weights_name], progress=progress)
model.load_state_dict(state_dict)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
return model
This diff is collapsed.
import warnings
from collections import namedtuple
from functools import partial
from typing import Optional, Tuple, List, Callable, Any
import torch
......@@ -7,15 +8,15 @@ import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from .._internally_replaced_utils import load_state_dict_from_url
from ..transforms._presets import ImageClassification, InterpolationMode
from ..utils import _log_api_usage_once
from ._api import WeightsEnum, Weights
from ._meta import _IMAGENET_CATEGORIES
from ._utils import handle_legacy_interface, _ovewrite_named_param
__all__ = ["GoogLeNet", "googlenet", "GoogLeNetOutputs", "_GoogLeNetOutputs"]
model_urls = {
# GoogLeNet ported from TensorFlow
"googlenet": "https://download.pytorch.org/models/googlenet-1378be20.pth",
}
__all__ = ["GoogLeNet", "GoogLeNetOutputs", "_GoogLeNetOutputs", "GoogLeNet_Weights", "googlenet"]
GoogLeNetOutputs = namedtuple("GoogLeNetOutputs", ["logits", "aux_logits2", "aux_logits1"])
GoogLeNetOutputs.__annotations__ = {"logits": Tensor, "aux_logits2": Optional[Tensor], "aux_logits1": Optional[Tensor]}
......@@ -274,38 +275,62 @@ class BasicConv2d(nn.Module):
return F.relu(x, inplace=True)
def googlenet(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> GoogLeNet:
class GoogLeNet_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/googlenet-1378be20.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
"task": "image_classification",
"architecture": "GoogLeNet",
"publication_year": 2014,
"num_params": 6624904,
"size": (224, 224),
"min_size": (15, 15),
"categories": _IMAGENET_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#googlenet",
"acc@1": 69.778,
"acc@5": 89.530,
},
)
DEFAULT = IMAGENET1K_V1
@handle_legacy_interface(weights=("pretrained", GoogLeNet_Weights.IMAGENET1K_V1))
def googlenet(*, weights: Optional[GoogLeNet_Weights] = None, progress: bool = True, **kwargs: Any) -> GoogLeNet:
r"""GoogLeNet (Inception v1) model architecture from
`"Going Deeper with Convolutions" <http://arxiv.org/abs/1409.4842>`_.
The required minimum input size of the model is 15x15.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
weights (GoogLeNet_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr
aux_logits (bool): If True, adds two auxiliary branches that can improve training.
Default: *False* when pretrained is True otherwise *True*
transform_input (bool): If True, preprocesses the input according to the method with which it
was trained on ImageNet. Default: True if ``pretrained=True``, else False.
was trained on ImageNet. Default: True if ``weights=GoogLeNet_Weights.IMAGENET1K_V1``, else False.
"""
if pretrained:
weights = GoogLeNet_Weights.verify(weights)
original_aux_logits = kwargs.get("aux_logits", False)
if weights is not None:
if "transform_input" not in kwargs:
kwargs["transform_input"] = True
if "aux_logits" not in kwargs:
kwargs["aux_logits"] = False
if kwargs["aux_logits"]:
warnings.warn(
"auxiliary heads in the pretrained googlenet model are NOT pretrained, so make sure to train them"
)
original_aux_logits = kwargs["aux_logits"]
kwargs["aux_logits"] = True
kwargs["init_weights"] = False
model = GoogLeNet(**kwargs)
state_dict = load_state_dict_from_url(model_urls["googlenet"], progress=progress)
model.load_state_dict(state_dict)
_ovewrite_named_param(kwargs, "transform_input", True)
_ovewrite_named_param(kwargs, "aux_logits", True)
_ovewrite_named_param(kwargs, "init_weights", False)
_ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"]))
model = GoogLeNet(**kwargs)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
if not original_aux_logits:
model.aux_logits = False
model.aux1 = None # type: ignore[assignment]
model.aux2 = None # type: ignore[assignment]
return model
else:
warnings.warn(
"auxiliary heads in the pretrained googlenet model are NOT pretrained, so make sure to train them"
)
return GoogLeNet(**kwargs)
return model
import warnings
from collections import namedtuple
from functools import partial
from typing import Callable, Any, Optional, Tuple, List
import torch
import torch.nn.functional as F
from torch import nn, Tensor
from .._internally_replaced_utils import load_state_dict_from_url
from ..transforms._presets import ImageClassification, InterpolationMode
from ..utils import _log_api_usage_once
from ._api import WeightsEnum, Weights
from ._meta import _IMAGENET_CATEGORIES
from ._utils import handle_legacy_interface, _ovewrite_named_param
__all__ = ["Inception3", "inception_v3", "InceptionOutputs", "_InceptionOutputs"]
__all__ = ["Inception3", "InceptionOutputs", "_InceptionOutputs", "Inception_V3_Weights", "inception_v3"]
model_urls = {
# Inception v3 ported from TensorFlow
"inception_v3_google": "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth",
}
InceptionOutputs = namedtuple("InceptionOutputs", ["logits", "aux_logits"])
InceptionOutputs.__annotations__ = {"logits": Tensor, "aux_logits": Optional[Tensor]}
......@@ -408,7 +407,29 @@ class BasicConv2d(nn.Module):
return F.relu(x, inplace=True)
def inception_v3(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> Inception3:
class Inception_V3_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth",
transforms=partial(ImageClassification, crop_size=299, resize_size=342),
meta={
"task": "image_classification",
"architecture": "InceptionV3",
"publication_year": 2015,
"num_params": 27161264,
"size": (299, 299),
"min_size": (75, 75),
"categories": _IMAGENET_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#inception-v3",
"acc@1": 77.294,
"acc@5": 93.450,
},
)
DEFAULT = IMAGENET1K_V1
@handle_legacy_interface(weights=("pretrained", Inception_V3_Weights.IMAGENET1K_V1))
def inception_v3(*, weights: Optional[Inception_V3_Weights] = None, progress: bool = True, **kwargs: Any) -> Inception3:
r"""Inception v3 model architecture from
`"Rethinking the Inception Architecture for Computer Vision" <http://arxiv.org/abs/1512.00567>`_.
The required minimum input size of the model is 75x75.
......@@ -418,28 +439,29 @@ def inception_v3(pretrained: bool = False, progress: bool = True, **kwargs: Any)
N x 3 x 299 x 299, so ensure your images are sized accordingly.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
weights (Inception_V3_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr
aux_logits (bool): If True, add an auxiliary branch that can improve training.
Default: *True*
transform_input (bool): If True, preprocesses the input according to the method with which it
was trained on ImageNet. Default: True if ``pretrained=True``, else False.
was trained on ImageNet. Default: True if ``weights=Inception_V3_Weights.IMAGENET1K_V1``, else False.
"""
if pretrained:
weights = Inception_V3_Weights.verify(weights)
original_aux_logits = kwargs.get("aux_logits", True)
if weights is not None:
if "transform_input" not in kwargs:
kwargs["transform_input"] = True
if "aux_logits" in kwargs:
original_aux_logits = kwargs["aux_logits"]
kwargs["aux_logits"] = True
else:
original_aux_logits = True
kwargs["init_weights"] = False # we are loading weights from a pretrained model
model = Inception3(**kwargs)
state_dict = load_state_dict_from_url(model_urls["inception_v3_google"], progress=progress)
model.load_state_dict(state_dict)
_ovewrite_named_param(kwargs, "transform_input", True)
_ovewrite_named_param(kwargs, "aux_logits", True)
_ovewrite_named_param(kwargs, "init_weights", False)
_ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"]))
model = Inception3(**kwargs)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
if not original_aux_logits:
model.aux_logits = False
model.AuxLogits = None
return model
return Inception3(**kwargs)
return model
import warnings
from typing import Any, Dict, List
from functools import partial
from typing import Any, Dict, List, Optional
import torch
import torch.nn as nn
from torch import Tensor
from .._internally_replaced_utils import load_state_dict_from_url
from ..transforms._presets import ImageClassification, InterpolationMode
from ..utils import _log_api_usage_once
from ._api import WeightsEnum, Weights
from ._meta import _IMAGENET_CATEGORIES
from ._utils import handle_legacy_interface, _ovewrite_named_param
__all__ = ["MNASNet", "mnasnet0_5", "mnasnet0_75", "mnasnet1_0", "mnasnet1_3"]
_MODEL_URLS = {
"mnasnet0_5": "https://download.pytorch.org/models/mnasnet0.5_top1_67.823-3ffadce67e.pth",
"mnasnet0_75": None,
"mnasnet1_0": "https://download.pytorch.org/models/mnasnet1.0_top1_73.512-f206786ef8.pth",
"mnasnet1_3": None,
}
__all__ = [
"MNASNet",
"MNASNet0_5_Weights",
"MNASNet0_75_Weights",
"MNASNet1_0_Weights",
"MNASNet1_3_Weights",
"mnasnet0_5",
"mnasnet0_75",
"mnasnet1_0",
"mnasnet1_3",
]
# Paper suggests 0.9997 momentum, for TensorFlow. Equivalent PyTorch momentum is
# 1.0 - tensorflow.
......@@ -202,68 +211,123 @@ class MNASNet(torch.nn.Module):
)
def _load_pretrained(model_name: str, model: nn.Module, progress: bool) -> None:
if model_name not in _MODEL_URLS or _MODEL_URLS[model_name] is None:
raise ValueError(f"No checkpoint is available for model type {model_name}")
checkpoint_url = _MODEL_URLS[model_name]
model.load_state_dict(load_state_dict_from_url(checkpoint_url, progress=progress))
_COMMON_META = {
"task": "image_classification",
"architecture": "MNASNet",
"publication_year": 2018,
"size": (224, 224),
"min_size": (1, 1),
"categories": _IMAGENET_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
"recipe": "https://github.com/1e100/mnasnet_trainer",
}
class MNASNet0_5_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/mnasnet0.5_top1_67.823-3ffadce67e.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"num_params": 2218512,
"acc@1": 67.734,
"acc@5": 87.490,
},
)
DEFAULT = IMAGENET1K_V1
class MNASNet0_75_Weights(WeightsEnum):
# If a default model is added here the corresponding changes need to be done in mnasnet0_75
pass
class MNASNet1_0_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/mnasnet1.0_top1_73.512-f206786ef8.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"num_params": 4383312,
"acc@1": 73.456,
"acc@5": 91.510,
},
)
DEFAULT = IMAGENET1K_V1
class MNASNet1_3_Weights(WeightsEnum):
# If a default model is added here the corresponding changes need to be done in mnasnet1_3
pass
def _mnasnet(alpha: float, weights: Optional[WeightsEnum], progress: bool, **kwargs: Any) -> MNASNet:
if weights is not None:
_ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"]))
model = MNASNet(alpha, **kwargs)
def mnasnet0_5(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MNASNet:
if weights:
model.load_state_dict(weights.get_state_dict(progress=progress))
return model
@handle_legacy_interface(weights=("pretrained", MNASNet0_5_Weights.IMAGENET1K_V1))
def mnasnet0_5(*, weights: Optional[MNASNet0_5_Weights] = None, progress: bool = True, **kwargs: Any) -> MNASNet:
r"""MNASNet with depth multiplier of 0.5 from
`"MnasNet: Platform-Aware Neural Architecture Search for Mobile"
<https://arxiv.org/pdf/1807.11626.pdf>`_.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
weights (MNASNet0_5_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr
"""
model = MNASNet(0.5, **kwargs)
if pretrained:
_load_pretrained("mnasnet0_5", model, progress)
return model
weights = MNASNet0_5_Weights.verify(weights)
return _mnasnet(0.5, weights, progress, **kwargs)
def mnasnet0_75(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MNASNet:
@handle_legacy_interface(weights=("pretrained", None))
def mnasnet0_75(*, weights: Optional[MNASNet0_75_Weights] = None, progress: bool = True, **kwargs: Any) -> MNASNet:
r"""MNASNet with depth multiplier of 0.75 from
`"MnasNet: Platform-Aware Neural Architecture Search for Mobile"
<https://arxiv.org/pdf/1807.11626.pdf>`_.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
weights (MNASNet0_75_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr
"""
model = MNASNet(0.75, **kwargs)
if pretrained:
_load_pretrained("mnasnet0_75", model, progress)
return model
weights = MNASNet0_75_Weights.verify(weights)
return _mnasnet(0.75, weights, progress, **kwargs)
def mnasnet1_0(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MNASNet:
@handle_legacy_interface(weights=("pretrained", MNASNet1_0_Weights.IMAGENET1K_V1))
def mnasnet1_0(*, weights: Optional[MNASNet1_0_Weights] = None, progress: bool = True, **kwargs: Any) -> MNASNet:
r"""MNASNet with depth multiplier of 1.0 from
`"MnasNet: Platform-Aware Neural Architecture Search for Mobile"
<https://arxiv.org/pdf/1807.11626.pdf>`_.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
weights (MNASNet1_0_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr
"""
model = MNASNet(1.0, **kwargs)
if pretrained:
_load_pretrained("mnasnet1_0", model, progress)
return model
weights = MNASNet1_0_Weights.verify(weights)
return _mnasnet(1.0, weights, progress, **kwargs)
def mnasnet1_3(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MNASNet:
@handle_legacy_interface(weights=("pretrained", None))
def mnasnet1_3(*, weights: Optional[MNASNet1_3_Weights] = None, progress: bool = True, **kwargs: Any) -> MNASNet:
r"""MNASNet with depth multiplier of 1.3 from
`"MnasNet: Platform-Aware Neural Architecture Search for Mobile"
<https://arxiv.org/pdf/1807.11626.pdf>`_.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
weights (MNASNet1_3_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr
"""
model = MNASNet(1.3, **kwargs)
if pretrained:
_load_pretrained("mnasnet1_3", model, progress)
return model
weights = MNASNet1_3_Weights.verify(weights)
return _mnasnet(1.3, weights, progress, **kwargs)
from .mobilenetv2 import MobileNetV2, mobilenet_v2, __all__ as mv2_all
from .mobilenetv3 import MobileNetV3, mobilenet_v3_large, mobilenet_v3_small, __all__ as mv3_all
from .mobilenetv2 import * # noqa: F401, F403
from .mobilenetv3 import * # noqa: F401, F403
from .mobilenetv2 import __all__ as mv2_all
from .mobilenetv3 import __all__ as mv3_all
__all__ = mv2_all + mv3_all
import warnings
from functools import partial
from typing import Callable, Any, Optional, List
import torch
from torch import Tensor
from torch import nn
from .._internally_replaced_utils import load_state_dict_from_url
from ..ops.misc import Conv2dNormActivation
from ..transforms._presets import ImageClassification, InterpolationMode
from ..utils import _log_api_usage_once
from ._utils import _make_divisible
from ._api import WeightsEnum, Weights
from ._meta import _IMAGENET_CATEGORIES
from ._utils import handle_legacy_interface, _ovewrite_named_param, _make_divisible
__all__ = ["MobileNetV2", "mobilenet_v2"]
model_urls = {
"mobilenet_v2": "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth",
}
__all__ = ["MobileNetV2", "MobileNet_V2_Weights", "mobilenet_v2"]
# necessary for backwards compatibility
......@@ -196,17 +194,62 @@ class MobileNetV2(nn.Module):
return self._forward_impl(x)
def mobilenet_v2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MobileNetV2:
_COMMON_META = {
"task": "image_classification",
"architecture": "MobileNetV2",
"publication_year": 2018,
"num_params": 3504872,
"size": (224, 224),
"min_size": (1, 1),
"categories": _IMAGENET_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
}
class MobileNet_V2_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/mobilenet_v2-b0353104.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#mobilenetv2",
"acc@1": 71.878,
"acc@5": 90.286,
},
)
IMAGENET1K_V2 = Weights(
url="https://download.pytorch.org/models/mobilenet_v2-7ebf99e0.pth",
transforms=partial(ImageClassification, crop_size=224, resize_size=232),
meta={
**_COMMON_META,
"recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-reg-tuning",
"acc@1": 72.154,
"acc@5": 90.822,
},
)
DEFAULT = IMAGENET1K_V2
@handle_legacy_interface(weights=("pretrained", MobileNet_V2_Weights.IMAGENET1K_V1))
def mobilenet_v2(
*, weights: Optional[MobileNet_V2_Weights] = None, progress: bool = True, **kwargs: Any
) -> MobileNetV2:
"""
Constructs a MobileNetV2 architecture from
`"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
weights (MobileNet_V2_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr
"""
weights = MobileNet_V2_Weights.verify(weights)
if weights is not None:
_ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"]))
model = MobileNetV2(**kwargs)
if pretrained:
state_dict = load_state_dict_from_url(model_urls["mobilenet_v2"], progress=progress)
model.load_state_dict(state_dict)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
return model
This diff is collapsed.
from .raft import RAFT, raft_large, raft_small
from .raft import *
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment