Unverified Commit 11bd2eaa authored by Vasilis Vryniotis's avatar Vasilis Vryniotis Committed by GitHub
Browse files

Port Multi-weight support from prototype to main (#5618)



* Moving basefiles outside of prototype and porting Alexnet, ConvNext, Densenet and EfficientNet.

* Porting googlenet

* Porting inception

* Porting mnasnet

* Porting mobilenetv2

* Porting mobilenetv3

* Porting regnet

* Porting resnet

* Porting shufflenetv2

* Porting squeezenet

* Porting vgg

* Porting vit

* Fix docstrings

* Fixing imports

* Adding missing import

* Fix mobilenet imports

* Fix tests

* Fix prototype tests

* Exclude get_weight from models on test

* Fix init files

* Porting googlenet

* Porting inception

* porting mobilenetv2

* porting mobilenetv3

* porting resnet

* porting shufflenetv2

* Fix test and linter

* Fixing docs.

* Porting Detection models (#5617)

* fix inits

* fix docs

* Port faster_rcnn

* Port fcos

* Port keypoint_rcnn

* Port mask_rcnn

* Port retinanet

* Port ssd

* Port ssdlite

* Fix linter

* Fixing tests

* Fixing tests

* Fixing vgg test

* Porting Optical Flow, Segmentation, Video models (#5619)

* Porting raft

* Porting video resnet

* Porting deeplabv3

* Porting fcn and lraspp

* Fixing the tests and linter

* Porting docs, examples, tutorials and galleries (#5620)

* Fix examples, tutorials and gallery

* Update gallery/plot_optical_flow.py
Co-authored-by: default avatarNicolas Hug <contact@nicolas-hug.com>

* Fix import

* Revert hardcoded normalization

* fix uncommitted changes

* Fix bug

* Fix more bugs

* Making resize optional for segmentation

* Fixing preset

* Fix mypy

* Fixing documentation strings

* Fix flake8

* minor refactoring
Co-authored-by: default avatarNicolas Hug <contact@nicolas-hug.com>

* Resolve conflict

* Porting model tests (#5622)

* Porting tests

* Remove unnecessary variable

* Fix linter

* Move prototype to extended tests

* Fix download models job

* Update CI on Multiweight branch to use the new weight download approach (#5628)

* port Pad to prototype transforms (#5621)

* port Pad to prototype transforms

* use literal

* Bump up LibTorchvision version number for Podspec to release Cocoapods (#5624)
Co-authored-by: default avatarAnton Thomma <anton@pri.co.nz>
Co-authored-by: default avatarVasilis Vryniotis <datumbox@users.noreply.github.com>

* pre-download model weights in CI docs build (#5625)

* pre-download model weights in CI docs build

* move changes into template

* change docs image

* Regenerated config.yml
Co-authored-by: default avatarPhilip Meier <github.pmeier@posteo.de>
Co-authored-by: default avatarAnton Thomma <11010310+thommaa@users.noreply.github.com>
Co-authored-by: default avatarAnton Thomma <anton@pri.co.nz>

* Porting reference scripts and updating presets (#5629)

* Making _preset.py classes

* Remove support of targets on presets.

* Rewriting the video preset

* Adding tests to check that the bundled transforms are JIT scriptable

* Rename all presets from *Eval to *Inference

* Minor refactoring

* Remove --prototype and --pretrained from reference scripts

* remove  pretained_backbone refs

* Corrections and simplifications

* Fixing bug

* Fixing linter

* Fix flake8

* restore documentation example

* minor fixes

* fix optical flow missing param

* Fixing commands

* Adding weights_backbone support in detection and segmentation

* Updating the commands for InceptionV3

* Setting `weights_backbone` to its fully BC value (#5653)

* Replace default `weights_backbone=None` with its BC values.

* Fixing tests

* Fix linter

* Update docs.

* Update preprocessing on reference scripts.

* Change qat/ptq to their full values.

* Refactoring preprocessing

* Fix video preset

* No initialization on VGG if pretrained

* Fix warning messages for backbone utils.

* Adding star to all preset constructors.

* Fix mypy.
Co-authored-by: default avatarNicolas Hug <contact@nicolas-hug.com>
Co-authored-by: default avatarPhilip Meier <github.pmeier@posteo.de>
Co-authored-by: default avatarAnton Thomma <11010310+thommaa@users.noreply.github.com>
Co-authored-by: default avatarAnton Thomma <anton@pri.co.nz>
parent 375e4ab2
import re import re
from collections import OrderedDict from collections import OrderedDict
from typing import Any, List, Tuple from functools import partial
from typing import Any, List, Optional, Tuple
import torch import torch
import torch.nn as nn import torch.nn as nn
...@@ -8,18 +9,24 @@ import torch.nn.functional as F ...@@ -8,18 +9,24 @@ import torch.nn.functional as F
import torch.utils.checkpoint as cp import torch.utils.checkpoint as cp
from torch import Tensor from torch import Tensor
from .._internally_replaced_utils import load_state_dict_from_url from ..transforms._presets import ImageClassification, InterpolationMode
from ..utils import _log_api_usage_once from ..utils import _log_api_usage_once
from ._api import WeightsEnum, Weights
from ._meta import _IMAGENET_CATEGORIES
from ._utils import handle_legacy_interface, _ovewrite_named_param
__all__ = ["DenseNet", "densenet121", "densenet169", "densenet201", "densenet161"] __all__ = [
"DenseNet",
model_urls = { "DenseNet121_Weights",
"densenet121": "https://download.pytorch.org/models/densenet121-a639ec97.pth", "DenseNet161_Weights",
"densenet169": "https://download.pytorch.org/models/densenet169-b2777c0a.pth", "DenseNet169_Weights",
"densenet201": "https://download.pytorch.org/models/densenet201-c1103571.pth", "DenseNet201_Weights",
"densenet161": "https://download.pytorch.org/models/densenet161-8d451a50.pth", "densenet121",
} "densenet161",
"densenet169",
"densenet201",
]
class _DenseLayer(nn.Module): class _DenseLayer(nn.Module):
...@@ -220,7 +227,7 @@ class DenseNet(nn.Module): ...@@ -220,7 +227,7 @@ class DenseNet(nn.Module):
return out return out
def _load_state_dict(model: nn.Module, model_url: str, progress: bool) -> None: def _load_state_dict(model: nn.Module, weights: WeightsEnum, progress: bool) -> None:
# '.'s are no longer allowed in module names, but previous _DenseLayer # '.'s are no longer allowed in module names, but previous _DenseLayer
# has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'. # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
# They are also in the checkpoints in model_urls. This pattern is used # They are also in the checkpoints in model_urls. This pattern is used
...@@ -229,7 +236,7 @@ def _load_state_dict(model: nn.Module, model_url: str, progress: bool) -> None: ...@@ -229,7 +236,7 @@ def _load_state_dict(model: nn.Module, model_url: str, progress: bool) -> None:
r"^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$" r"^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$"
) )
state_dict = load_state_dict_from_url(model_url, progress=progress) state_dict = weights.get_state_dict(progress=progress)
for key in list(state_dict.keys()): for key in list(state_dict.keys()):
res = pattern.match(key) res = pattern.match(key)
if res: if res:
...@@ -240,71 +247,155 @@ def _load_state_dict(model: nn.Module, model_url: str, progress: bool) -> None: ...@@ -240,71 +247,155 @@ def _load_state_dict(model: nn.Module, model_url: str, progress: bool) -> None:
def _densenet( def _densenet(
arch: str,
growth_rate: int, growth_rate: int,
block_config: Tuple[int, int, int, int], block_config: Tuple[int, int, int, int],
num_init_features: int, num_init_features: int,
pretrained: bool, weights: Optional[WeightsEnum],
progress: bool, progress: bool,
**kwargs: Any, **kwargs: Any,
) -> DenseNet: ) -> DenseNet:
if weights is not None:
_ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"]))
model = DenseNet(growth_rate, block_config, num_init_features, **kwargs) model = DenseNet(growth_rate, block_config, num_init_features, **kwargs)
if pretrained:
_load_state_dict(model, model_urls[arch], progress) if weights is not None:
_load_state_dict(model=model, weights=weights, progress=progress)
return model return model
def densenet121(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> DenseNet: _COMMON_META = {
"task": "image_classification",
"architecture": "DenseNet",
"publication_year": 2016,
"size": (224, 224),
"min_size": (29, 29),
"categories": _IMAGENET_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
"recipe": "https://github.com/pytorch/vision/pull/116",
}
class DenseNet121_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/densenet121-a639ec97.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"num_params": 7978856,
"acc@1": 74.434,
"acc@5": 91.972,
},
)
DEFAULT = IMAGENET1K_V1
class DenseNet161_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/densenet161-8d451a50.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"num_params": 28681000,
"acc@1": 77.138,
"acc@5": 93.560,
},
)
DEFAULT = IMAGENET1K_V1
class DenseNet169_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/densenet169-b2777c0a.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"num_params": 14149480,
"acc@1": 75.600,
"acc@5": 92.806,
},
)
DEFAULT = IMAGENET1K_V1
class DenseNet201_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/densenet201-c1103571.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"num_params": 20013928,
"acc@1": 76.896,
"acc@5": 93.370,
},
)
DEFAULT = IMAGENET1K_V1
@handle_legacy_interface(weights=("pretrained", DenseNet121_Weights.IMAGENET1K_V1))
def densenet121(*, weights: Optional[DenseNet121_Weights] = None, progress: bool = True, **kwargs: Any) -> DenseNet:
r"""Densenet-121 model from r"""Densenet-121 model from
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_. `"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_.
The required minimum input size of the model is 29x29. The required minimum input size of the model is 29x29.
Args: Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet weights (DenseNet121_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr progress (bool): If True, displays a progress bar of the download to stderr
memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient, memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_. but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_.
""" """
return _densenet("densenet121", 32, (6, 12, 24, 16), 64, pretrained, progress, **kwargs) weights = DenseNet121_Weights.verify(weights)
return _densenet(32, (6, 12, 24, 16), 64, weights, progress, **kwargs)
def densenet161(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> DenseNet:
@handle_legacy_interface(weights=("pretrained", DenseNet161_Weights.IMAGENET1K_V1))
def densenet161(*, weights: Optional[DenseNet161_Weights] = None, progress: bool = True, **kwargs: Any) -> DenseNet:
r"""Densenet-161 model from r"""Densenet-161 model from
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_. `"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_.
The required minimum input size of the model is 29x29. The required minimum input size of the model is 29x29.
Args: Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet weights (DenseNet161_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr progress (bool): If True, displays a progress bar of the download to stderr
memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient, memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_. but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_.
""" """
return _densenet("densenet161", 48, (6, 12, 36, 24), 96, pretrained, progress, **kwargs) weights = DenseNet161_Weights.verify(weights)
return _densenet(48, (6, 12, 36, 24), 96, weights, progress, **kwargs)
def densenet169(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> DenseNet: @handle_legacy_interface(weights=("pretrained", DenseNet169_Weights.IMAGENET1K_V1))
def densenet169(*, weights: Optional[DenseNet169_Weights] = None, progress: bool = True, **kwargs: Any) -> DenseNet:
r"""Densenet-169 model from r"""Densenet-169 model from
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_. `"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_.
The required minimum input size of the model is 29x29. The required minimum input size of the model is 29x29.
Args: Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet weights (DenseNet169_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr progress (bool): If True, displays a progress bar of the download to stderr
memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient, memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_. but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_.
""" """
return _densenet("densenet169", 32, (6, 12, 32, 32), 64, pretrained, progress, **kwargs) weights = DenseNet169_Weights.verify(weights)
return _densenet(32, (6, 12, 32, 32), 64, weights, progress, **kwargs)
def densenet201(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> DenseNet:
@handle_legacy_interface(weights=("pretrained", DenseNet201_Weights.IMAGENET1K_V1))
def densenet201(*, weights: Optional[DenseNet201_Weights] = None, progress: bool = True, **kwargs: Any) -> DenseNet:
r"""Densenet-201 model from r"""Densenet-201 model from
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_. `"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_.
The required minimum input size of the model is 29x29. The required minimum input size of the model is 29x29.
Args: Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet weights (DenseNet201_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr progress (bool): If True, displays a progress bar of the download to stderr
memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient, memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_. but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_.
""" """
return _densenet("densenet201", 32, (6, 12, 48, 32), 64, pretrained, progress, **kwargs) weights = DenseNet201_Weights.verify(weights)
return _densenet(32, (6, 12, 48, 32), 64, weights, progress, **kwargs)
from .faster_rcnn import * from .faster_rcnn import *
from .mask_rcnn import * from .fcos import *
from .keypoint_rcnn import * from .keypoint_rcnn import *
from .mask_rcnn import *
from .retinanet import * from .retinanet import *
from .ssd import * from .ssd import *
from .ssdlite import * from .ssdlite import *
from .fcos import *
...@@ -6,7 +6,8 @@ from torchvision.ops import misc as misc_nn_ops ...@@ -6,7 +6,8 @@ from torchvision.ops import misc as misc_nn_ops
from torchvision.ops.feature_pyramid_network import ExtraFPNBlock, FeaturePyramidNetwork, LastLevelMaxPool from torchvision.ops.feature_pyramid_network import ExtraFPNBlock, FeaturePyramidNetwork, LastLevelMaxPool
from .. import mobilenet, resnet from .. import mobilenet, resnet
from .._utils import IntermediateLayerGetter from .._api import WeightsEnum, get_enum_from_fn
from .._utils import IntermediateLayerGetter, handle_legacy_interface
class BackboneWithFPN(nn.Module): class BackboneWithFPN(nn.Module):
...@@ -55,9 +56,16 @@ class BackboneWithFPN(nn.Module): ...@@ -55,9 +56,16 @@ class BackboneWithFPN(nn.Module):
return x return x
@handle_legacy_interface(
weights=(
"pretrained",
lambda kwargs: get_enum_from_fn(resnet.__dict__[kwargs["backbone_name"]]).from_str("IMAGENET1K_V1"),
),
)
def resnet_fpn_backbone( def resnet_fpn_backbone(
*,
backbone_name: str, backbone_name: str,
pretrained: bool, weights: Optional[WeightsEnum],
norm_layer: Callable[..., nn.Module] = misc_nn_ops.FrozenBatchNorm2d, norm_layer: Callable[..., nn.Module] = misc_nn_ops.FrozenBatchNorm2d,
trainable_layers: int = 3, trainable_layers: int = 3,
returned_layers: Optional[List[int]] = None, returned_layers: Optional[List[int]] = None,
...@@ -69,7 +77,7 @@ def resnet_fpn_backbone( ...@@ -69,7 +77,7 @@ def resnet_fpn_backbone(
Examples:: Examples::
>>> from torchvision.models.detection.backbone_utils import resnet_fpn_backbone >>> from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
>>> backbone = resnet_fpn_backbone('resnet50', pretrained=True, trainable_layers=3) >>> backbone = resnet_fpn_backbone('resnet50', weights=ResNet50_Weights.DEFAULT, trainable_layers=3)
>>> # get some dummy image >>> # get some dummy image
>>> x = torch.rand(1,3,64,64) >>> x = torch.rand(1,3,64,64)
>>> # compute the output >>> # compute the output
...@@ -85,10 +93,10 @@ def resnet_fpn_backbone( ...@@ -85,10 +93,10 @@ def resnet_fpn_backbone(
Args: Args:
backbone_name (string): resnet architecture. Possible values are 'resnet18', 'resnet34', 'resnet50', backbone_name (string): resnet architecture. Possible values are 'resnet18', 'resnet34', 'resnet50',
'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2' 'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2'
pretrained (bool): If True, returns a model with backbone pre-trained on Imagenet weights (WeightsEnum, optional): The pretrained weights for the model
norm_layer (callable): it is recommended to use the default value. For details visit: norm_layer (callable): it is recommended to use the default value. For details visit:
(https://github.com/facebookresearch/maskrcnn-benchmark/issues/267) (https://github.com/facebookresearch/maskrcnn-benchmark/issues/267)
trainable_layers (int): number of trainable (not frozen) resnet layers starting from final block. trainable_layers (int): number of trainable (not frozen) layers starting from final block.
Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable.
returned_layers (list of int): The layers of the network to return. Each entry must be in ``[1, 4]``. returned_layers (list of int): The layers of the network to return. Each entry must be in ``[1, 4]``.
By default all layers are returned. By default all layers are returned.
...@@ -98,7 +106,7 @@ def resnet_fpn_backbone( ...@@ -98,7 +106,7 @@ def resnet_fpn_backbone(
a new list of feature maps and their corresponding names. By a new list of feature maps and their corresponding names. By
default a ``LastLevelMaxPool`` is used. default a ``LastLevelMaxPool`` is used.
""" """
backbone = resnet.__dict__[backbone_name](pretrained=pretrained, norm_layer=norm_layer) backbone = resnet.__dict__[backbone_name](weights=weights, norm_layer=norm_layer)
return _resnet_fpn_extractor(backbone, trainable_layers, returned_layers, extra_blocks) return _resnet_fpn_extractor(backbone, trainable_layers, returned_layers, extra_blocks)
...@@ -135,13 +143,13 @@ def _resnet_fpn_extractor( ...@@ -135,13 +143,13 @@ def _resnet_fpn_extractor(
def _validate_trainable_layers( def _validate_trainable_layers(
pretrained: bool, is_trained: bool,
trainable_backbone_layers: Optional[int], trainable_backbone_layers: Optional[int],
max_value: int, max_value: int,
default_value: int, default_value: int,
) -> int: ) -> int:
# don't freeze any layers if pretrained model or backbone is not used # don't freeze any layers if pretrained model or backbone is not used
if not pretrained: if not is_trained:
if trainable_backbone_layers is not None: if trainable_backbone_layers is not None:
warnings.warn( warnings.warn(
"Changing trainable_backbone_layers has not effect if " "Changing trainable_backbone_layers has not effect if "
...@@ -160,16 +168,23 @@ def _validate_trainable_layers( ...@@ -160,16 +168,23 @@ def _validate_trainable_layers(
return trainable_backbone_layers return trainable_backbone_layers
@handle_legacy_interface(
weights=(
"pretrained",
lambda kwargs: get_enum_from_fn(mobilenet.__dict__[kwargs["backbone_name"]]).from_str("IMAGENET1K_V1"),
),
)
def mobilenet_backbone( def mobilenet_backbone(
*,
backbone_name: str, backbone_name: str,
pretrained: bool, weights: Optional[WeightsEnum],
fpn: bool, fpn: bool,
norm_layer: Callable[..., nn.Module] = misc_nn_ops.FrozenBatchNorm2d, norm_layer: Callable[..., nn.Module] = misc_nn_ops.FrozenBatchNorm2d,
trainable_layers: int = 2, trainable_layers: int = 2,
returned_layers: Optional[List[int]] = None, returned_layers: Optional[List[int]] = None,
extra_blocks: Optional[ExtraFPNBlock] = None, extra_blocks: Optional[ExtraFPNBlock] = None,
) -> nn.Module: ) -> nn.Module:
backbone = mobilenet.__dict__[backbone_name](pretrained=pretrained, norm_layer=norm_layer) backbone = mobilenet.__dict__[backbone_name](weights=weights, norm_layer=norm_layer)
return _mobilenet_extractor(backbone, fpn, trainable_layers, returned_layers, extra_blocks) return _mobilenet_extractor(backbone, fpn, trainable_layers, returned_layers, extra_blocks)
......
from typing import Any, Optional, Union
import torch.nn.functional as F import torch.nn.functional as F
from torch import nn from torch import nn
from torchvision.ops import MultiScaleRoIAlign from torchvision.ops import MultiScaleRoIAlign
from ..._internally_replaced_utils import load_state_dict_from_url
from ...ops import misc as misc_nn_ops from ...ops import misc as misc_nn_ops
from ..mobilenetv3 import mobilenet_v3_large from ...transforms._presets import ObjectDetection, InterpolationMode
from ..resnet import resnet50 from .._api import WeightsEnum, Weights
from .._meta import _COCO_CATEGORIES
from .._utils import handle_legacy_interface, _ovewrite_value_param
from ..mobilenetv3 import MobileNet_V3_Large_Weights, mobilenet_v3_large
from ..resnet import ResNet50_Weights, resnet50
from ._utils import overwrite_eps from ._utils import overwrite_eps
from .anchor_utils import AnchorGenerator from .anchor_utils import AnchorGenerator
from .backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers, _mobilenet_extractor from .backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers, _mobilenet_extractor
...@@ -17,9 +22,12 @@ from .transform import GeneralizedRCNNTransform ...@@ -17,9 +22,12 @@ from .transform import GeneralizedRCNNTransform
__all__ = [ __all__ = [
"FasterRCNN", "FasterRCNN",
"FasterRCNN_ResNet50_FPN_Weights",
"FasterRCNN_MobileNet_V3_Large_FPN_Weights",
"FasterRCNN_MobileNet_V3_Large_320_FPN_Weights",
"fasterrcnn_resnet50_fpn", "fasterrcnn_resnet50_fpn",
"fasterrcnn_mobilenet_v3_large_320_fpn",
"fasterrcnn_mobilenet_v3_large_fpn", "fasterrcnn_mobilenet_v3_large_fpn",
"fasterrcnn_mobilenet_v3_large_320_fpn",
] ]
...@@ -109,7 +117,7 @@ class FasterRCNN(GeneralizedRCNN): ...@@ -109,7 +117,7 @@ class FasterRCNN(GeneralizedRCNN):
>>> from torchvision.models.detection.rpn import AnchorGenerator >>> from torchvision.models.detection.rpn import AnchorGenerator
>>> # load a pre-trained model for classification and return >>> # load a pre-trained model for classification and return
>>> # only the features >>> # only the features
>>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features >>> backbone = torchvision.models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).features
>>> # FasterRCNN needs to know the number of >>> # FasterRCNN needs to know the number of
>>> # output channels in a backbone. For mobilenet_v2, it's 1280 >>> # output channels in a backbone. For mobilenet_v2, it's 1280
>>> # so we need to add it here >>> # so we need to add it here
...@@ -316,16 +324,70 @@ class FastRCNNPredictor(nn.Module): ...@@ -316,16 +324,70 @@ class FastRCNNPredictor(nn.Module):
return scores, bbox_deltas return scores, bbox_deltas
model_urls = { _COMMON_META = {
"fasterrcnn_resnet50_fpn_coco": "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth", "task": "image_object_detection",
"fasterrcnn_mobilenet_v3_large_320_fpn_coco": "https://download.pytorch.org/models/fasterrcnn_mobilenet_v3_large_320_fpn-907ea3f9.pth", "architecture": "FasterRCNN",
"fasterrcnn_mobilenet_v3_large_fpn_coco": "https://download.pytorch.org/models/fasterrcnn_mobilenet_v3_large_fpn-fb6a3cc7.pth", "publication_year": 2015,
"categories": _COCO_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
} }
class FasterRCNN_ResNet50_FPN_Weights(WeightsEnum):
COCO_V1 = Weights(
url="https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth",
transforms=ObjectDetection,
meta={
**_COMMON_META,
"num_params": 41755286,
"recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-resnet-50-fpn",
"map": 37.0,
},
)
DEFAULT = COCO_V1
class FasterRCNN_MobileNet_V3_Large_FPN_Weights(WeightsEnum):
COCO_V1 = Weights(
url="https://download.pytorch.org/models/fasterrcnn_mobilenet_v3_large_fpn-fb6a3cc7.pth",
transforms=ObjectDetection,
meta={
**_COMMON_META,
"num_params": 19386354,
"recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-mobilenetv3-large-fpn",
"map": 32.8,
},
)
DEFAULT = COCO_V1
class FasterRCNN_MobileNet_V3_Large_320_FPN_Weights(WeightsEnum):
COCO_V1 = Weights(
url="https://download.pytorch.org/models/fasterrcnn_mobilenet_v3_large_320_fpn-907ea3f9.pth",
transforms=ObjectDetection,
meta={
**_COMMON_META,
"num_params": 19386354,
"recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-mobilenetv3-large-320-fpn",
"map": 22.8,
},
)
DEFAULT = COCO_V1
@handle_legacy_interface(
weights=("pretrained", FasterRCNN_ResNet50_FPN_Weights.COCO_V1),
weights_backbone=("pretrained_backbone", ResNet50_Weights.IMAGENET1K_V1),
)
def fasterrcnn_resnet50_fpn( def fasterrcnn_resnet50_fpn(
pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=None, **kwargs *,
): weights: Optional[FasterRCNN_ResNet50_FPN_Weights] = None,
progress: bool = True,
num_classes: Optional[int] = None,
weights_backbone: Optional[ResNet50_Weights] = ResNet50_Weights.IMAGENET1K_V1,
trainable_backbone_layers: Optional[int] = None,
**kwargs: Any,
) -> FasterRCNN:
""" """
Constructs a Faster R-CNN model with a ResNet-50-FPN backbone. Constructs a Faster R-CNN model with a ResNet-50-FPN backbone.
...@@ -362,7 +424,7 @@ def fasterrcnn_resnet50_fpn( ...@@ -362,7 +424,7 @@ def fasterrcnn_resnet50_fpn(
Example:: Example::
>>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) >>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
>>> # For training >>> # For training
>>> images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 11, 4) >>> images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 11, 4)
>>> boxes[:, :, 2:4] = boxes[:, :, 0:2] + boxes[:, :, 2:4] >>> boxes[:, :, 2:4] = boxes[:, :, 0:2] + boxes[:, :, 2:4]
...@@ -384,51 +446,60 @@ def fasterrcnn_resnet50_fpn( ...@@ -384,51 +446,60 @@ def fasterrcnn_resnet50_fpn(
>>> torch.onnx.export(model, x, "faster_rcnn.onnx", opset_version = 11) >>> torch.onnx.export(model, x, "faster_rcnn.onnx", opset_version = 11)
Args: Args:
pretrained (bool): If True, returns a model pre-trained on COCO train2017 weights (FasterRCNN_ResNet50_FPN_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr progress (bool): If True, displays a progress bar of the download to stderr
num_classes (int): number of output classes of the model (including the background) num_classes (int, optional): number of output classes of the model (including the background)
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet weights_backbone (ResNet50_Weights, optional): The pretrained weights for the backbone
trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block. trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block.
Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. If ``None`` is Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. If ``None`` is
passed (the default) this value is set to 3. passed (the default) this value is set to 3.
""" """
is_trained = pretrained or pretrained_backbone weights = FasterRCNN_ResNet50_FPN_Weights.verify(weights)
weights_backbone = ResNet50_Weights.verify(weights_backbone)
if weights is not None:
weights_backbone = None
num_classes = _ovewrite_value_param(num_classes, len(weights.meta["categories"]))
elif num_classes is None:
num_classes = 91
is_trained = weights is not None or weights_backbone is not None
trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3) trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3)
norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
if pretrained: backbone = resnet50(weights=weights_backbone, progress=progress, norm_layer=norm_layer)
# no need to download the backbone if pretrained is set
pretrained_backbone = False
backbone = resnet50(pretrained=pretrained_backbone, progress=progress, norm_layer=norm_layer)
backbone = _resnet_fpn_extractor(backbone, trainable_backbone_layers) backbone = _resnet_fpn_extractor(backbone, trainable_backbone_layers)
model = FasterRCNN(backbone, num_classes, **kwargs) model = FasterRCNN(backbone, num_classes=num_classes, **kwargs)
if pretrained:
state_dict = load_state_dict_from_url(model_urls["fasterrcnn_resnet50_fpn_coco"], progress=progress) if weights is not None:
model.load_state_dict(state_dict) model.load_state_dict(weights.get_state_dict(progress=progress))
overwrite_eps(model, 0.0) if weights == FasterRCNN_ResNet50_FPN_Weights.COCO_V1:
overwrite_eps(model, 0.0)
return model return model
def _fasterrcnn_mobilenet_v3_large_fpn( def _fasterrcnn_mobilenet_v3_large_fpn(
weights_name, *,
pretrained=False, weights: Optional[Union[FasterRCNN_MobileNet_V3_Large_FPN_Weights, FasterRCNN_MobileNet_V3_Large_320_FPN_Weights]],
progress=True, progress: bool,
num_classes=91, num_classes: Optional[int],
pretrained_backbone=True, weights_backbone: Optional[MobileNet_V3_Large_Weights],
trainable_backbone_layers=None, trainable_backbone_layers: Optional[int],
**kwargs, **kwargs: Any,
): ) -> FasterRCNN:
is_trained = pretrained or pretrained_backbone if weights is not None:
weights_backbone = None
num_classes = _ovewrite_value_param(num_classes, len(weights.meta["categories"]))
elif num_classes is None:
num_classes = 91
is_trained = weights is not None or weights_backbone is not None
trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 6, 3) trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 6, 3)
norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
if pretrained: backbone = mobilenet_v3_large(weights=weights_backbone, progress=progress, norm_layer=norm_layer)
pretrained_backbone = False
backbone = mobilenet_v3_large(pretrained=pretrained_backbone, progress=progress, norm_layer=norm_layer)
backbone = _mobilenet_extractor(backbone, True, trainable_backbone_layers) backbone = _mobilenet_extractor(backbone, True, trainable_backbone_layers)
anchor_sizes = ( anchor_sizes = (
( (
32, 32,
...@@ -439,21 +510,29 @@ def _fasterrcnn_mobilenet_v3_large_fpn( ...@@ -439,21 +510,29 @@ def _fasterrcnn_mobilenet_v3_large_fpn(
), ),
) * 3 ) * 3
aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
model = FasterRCNN( model = FasterRCNN(
backbone, num_classes, rpn_anchor_generator=AnchorGenerator(anchor_sizes, aspect_ratios), **kwargs backbone, num_classes, rpn_anchor_generator=AnchorGenerator(anchor_sizes, aspect_ratios), **kwargs
) )
if pretrained:
if model_urls.get(weights_name, None) is None: if weights is not None:
raise ValueError(f"No checkpoint is available for model {weights_name}") model.load_state_dict(weights.get_state_dict(progress=progress))
state_dict = load_state_dict_from_url(model_urls[weights_name], progress=progress)
model.load_state_dict(state_dict)
return model return model
@handle_legacy_interface(
weights=("pretrained", FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.COCO_V1),
weights_backbone=("pretrained_backbone", MobileNet_V3_Large_Weights.IMAGENET1K_V1),
)
def fasterrcnn_mobilenet_v3_large_320_fpn( def fasterrcnn_mobilenet_v3_large_320_fpn(
pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=None, **kwargs *,
): weights: Optional[FasterRCNN_MobileNet_V3_Large_320_FPN_Weights] = None,
progress: bool = True,
num_classes: Optional[int] = None,
weights_backbone: Optional[MobileNet_V3_Large_Weights] = MobileNet_V3_Large_Weights.IMAGENET1K_V1,
trainable_backbone_layers: Optional[int] = None,
**kwargs: Any,
) -> FasterRCNN:
""" """
Constructs a low resolution Faster R-CNN model with a MobileNetV3-Large FPN backbone tunned for mobile use-cases. Constructs a low resolution Faster R-CNN model with a MobileNetV3-Large FPN backbone tunned for mobile use-cases.
It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See
...@@ -462,21 +541,23 @@ def fasterrcnn_mobilenet_v3_large_320_fpn( ...@@ -462,21 +541,23 @@ def fasterrcnn_mobilenet_v3_large_320_fpn(
Example:: Example::
>>> model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(pretrained=True) >>> model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT)
>>> model.eval() >>> model.eval()
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
>>> predictions = model(x) >>> predictions = model(x)
Args: Args:
pretrained (bool): If True, returns a model pre-trained on COCO train2017 weights (FasterRCNN_MobileNet_V3_Large_320_FPN_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr progress (bool): If True, displays a progress bar of the download to stderr
num_classes (int): number of output classes of the model (including the background) num_classes (int, optional): number of output classes of the model (including the background)
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet weights_backbone (MobileNet_V3_Large_Weights, optional): The pretrained weights for the backbone
trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block. trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block.
Valid values are between 0 and 6, with 6 meaning all backbone layers are trainable. If ``None`` is Valid values are between 0 and 6, with 6 meaning all backbone layers are trainable. If ``None`` is
passed (the default) this value is set to 3. passed (the default) this value is set to 3.
""" """
weights_name = "fasterrcnn_mobilenet_v3_large_320_fpn_coco" weights = FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.verify(weights)
weights_backbone = MobileNet_V3_Large_Weights.verify(weights_backbone)
defaults = { defaults = {
"min_size": 320, "min_size": 320,
"max_size": 640, "max_size": 640,
...@@ -487,19 +568,28 @@ def fasterrcnn_mobilenet_v3_large_320_fpn( ...@@ -487,19 +568,28 @@ def fasterrcnn_mobilenet_v3_large_320_fpn(
kwargs = {**defaults, **kwargs} kwargs = {**defaults, **kwargs}
return _fasterrcnn_mobilenet_v3_large_fpn( return _fasterrcnn_mobilenet_v3_large_fpn(
weights_name, weights=weights,
pretrained=pretrained,
progress=progress, progress=progress,
num_classes=num_classes, num_classes=num_classes,
pretrained_backbone=pretrained_backbone, weights_backbone=weights_backbone,
trainable_backbone_layers=trainable_backbone_layers, trainable_backbone_layers=trainable_backbone_layers,
**kwargs, **kwargs,
) )
@handle_legacy_interface(
weights=("pretrained", FasterRCNN_MobileNet_V3_Large_FPN_Weights.COCO_V1),
weights_backbone=("pretrained_backbone", MobileNet_V3_Large_Weights.IMAGENET1K_V1),
)
def fasterrcnn_mobilenet_v3_large_fpn( def fasterrcnn_mobilenet_v3_large_fpn(
pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=None, **kwargs *,
): weights: Optional[FasterRCNN_MobileNet_V3_Large_FPN_Weights] = None,
progress: bool = True,
num_classes: Optional[int] = None,
weights_backbone: Optional[MobileNet_V3_Large_Weights] = MobileNet_V3_Large_Weights.IMAGENET1K_V1,
trainable_backbone_layers: Optional[int] = None,
**kwargs: Any,
) -> FasterRCNN:
""" """
Constructs a high resolution Faster R-CNN model with a MobileNetV3-Large FPN backbone. Constructs a high resolution Faster R-CNN model with a MobileNetV3-Large FPN backbone.
It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See
...@@ -508,32 +598,33 @@ def fasterrcnn_mobilenet_v3_large_fpn( ...@@ -508,32 +598,33 @@ def fasterrcnn_mobilenet_v3_large_fpn(
Example:: Example::
>>> model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True) >>> model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(weights=FasterRCNN_MobileNet_V3_Large_FPN_Weights.DEFAULT)
>>> model.eval() >>> model.eval()
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
>>> predictions = model(x) >>> predictions = model(x)
Args: Args:
pretrained (bool): If True, returns a model pre-trained on COCO train2017 weights (FasterRCNN_MobileNet_V3_Large_FPN_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr progress (bool): If True, displays a progress bar of the download to stderr
num_classes (int): number of output classes of the model (including the background) num_classes (int, optional): number of output classes of the model (including the background)
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet weights_backbone (MobileNet_V3_Large_Weights, optional): The pretrained weights for the backbone
trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block. trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block.
Valid values are between 0 and 6, with 6 meaning all backbone layers are trainable. If ``None`` is Valid values are between 0 and 6, with 6 meaning all backbone layers are trainable. If ``None`` is
passed (the default) this value is set to 3. passed (the default) this value is set to 3.
""" """
weights_name = "fasterrcnn_mobilenet_v3_large_fpn_coco" weights = FasterRCNN_MobileNet_V3_Large_FPN_Weights.verify(weights)
weights_backbone = MobileNet_V3_Large_Weights.verify(weights_backbone)
defaults = { defaults = {
"rpn_score_thresh": 0.05, "rpn_score_thresh": 0.05,
} }
kwargs = {**defaults, **kwargs} kwargs = {**defaults, **kwargs}
return _fasterrcnn_mobilenet_v3_large_fpn( return _fasterrcnn_mobilenet_v3_large_fpn(
weights_name, weights=weights,
pretrained=pretrained,
progress=progress, progress=progress,
num_classes=num_classes, num_classes=num_classes,
pretrained_backbone=pretrained_backbone, weights_backbone=weights_backbone,
trainable_backbone_layers=trainable_backbone_layers, trainable_backbone_layers=trainable_backbone_layers,
**kwargs, **kwargs,
) )
...@@ -2,25 +2,32 @@ import math ...@@ -2,25 +2,32 @@ import math
import warnings import warnings
from collections import OrderedDict from collections import OrderedDict
from functools import partial from functools import partial
from typing import Callable, Dict, List, Tuple, Optional from typing import Any, Callable, Dict, List, Tuple, Optional
import torch import torch
from torch import nn, Tensor from torch import nn, Tensor
from ..._internally_replaced_utils import load_state_dict_from_url
from ...ops import sigmoid_focal_loss, generalized_box_iou_loss from ...ops import sigmoid_focal_loss, generalized_box_iou_loss
from ...ops import boxes as box_ops from ...ops import boxes as box_ops
from ...ops import misc as misc_nn_ops from ...ops import misc as misc_nn_ops
from ...ops.feature_pyramid_network import LastLevelP6P7 from ...ops.feature_pyramid_network import LastLevelP6P7
from ...transforms._presets import ObjectDetection, InterpolationMode
from ...utils import _log_api_usage_once from ...utils import _log_api_usage_once
from ..resnet import resnet50 from .._api import WeightsEnum, Weights
from .._meta import _COCO_CATEGORIES
from .._utils import handle_legacy_interface, _ovewrite_value_param
from ..resnet import ResNet50_Weights, resnet50
from . import _utils as det_utils from . import _utils as det_utils
from .anchor_utils import AnchorGenerator from .anchor_utils import AnchorGenerator
from .backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers from .backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers
from .transform import GeneralizedRCNNTransform from .transform import GeneralizedRCNNTransform
__all__ = ["FCOS", "fcos_resnet50_fpn"] __all__ = [
"FCOS",
"FCOS_ResNet50_FPN_Weights",
"fcos_resnet50_fpn",
]
class FCOSHead(nn.Module): class FCOSHead(nn.Module):
...@@ -318,7 +325,7 @@ class FCOS(nn.Module): ...@@ -318,7 +325,7 @@ class FCOS(nn.Module):
>>> from torchvision.models.detection.anchor_utils import AnchorGenerator >>> from torchvision.models.detection.anchor_utils import AnchorGenerator
>>> # load a pre-trained model for classification and return >>> # load a pre-trained model for classification and return
>>> # only the features >>> # only the features
>>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features >>> backbone = torchvision.models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).features
>>> # FCOS needs to know the number of >>> # FCOS needs to know the number of
>>> # output channels in a backbone. For mobilenet_v2, it's 1280 >>> # output channels in a backbone. For mobilenet_v2, it's 1280
>>> # so we need to add it here >>> # so we need to add it here
...@@ -636,19 +643,37 @@ class FCOS(nn.Module): ...@@ -636,19 +643,37 @@ class FCOS(nn.Module):
return self.eager_outputs(losses, detections) return self.eager_outputs(losses, detections)
model_urls = { class FCOS_ResNet50_FPN_Weights(WeightsEnum):
"fcos_resnet50_fpn_coco": "https://download.pytorch.org/models/fcos_resnet50_fpn_coco-99b0c9b7.pth", COCO_V1 = Weights(
} url="https://download.pytorch.org/models/fcos_resnet50_fpn_coco-99b0c9b7.pth",
transforms=ObjectDetection,
meta={
"task": "image_object_detection",
"architecture": "FCOS",
"publication_year": 2019,
"num_params": 32269600,
"categories": _COCO_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
"recipe": "https://github.com/pytorch/vision/tree/main/references/detection#fcos-resnet-50-fpn",
"map": 39.2,
},
)
DEFAULT = COCO_V1
@handle_legacy_interface(
weights=("pretrained", FCOS_ResNet50_FPN_Weights.COCO_V1),
weights_backbone=("pretrained_backbone", ResNet50_Weights.IMAGENET1K_V1),
)
def fcos_resnet50_fpn( def fcos_resnet50_fpn(
pretrained: bool = False, *,
weights: Optional[FCOS_ResNet50_FPN_Weights] = None,
progress: bool = True, progress: bool = True,
num_classes: int = 91, num_classes: Optional[int] = None,
pretrained_backbone: bool = True, weights_backbone: Optional[ResNet50_Weights] = ResNet50_Weights.IMAGENET1K_V1,
trainable_backbone_layers: Optional[int] = None, trainable_backbone_layers: Optional[int] = None,
**kwargs, **kwargs: Any,
): ) -> FCOS:
""" """
Constructs a FCOS model with a ResNet-50-FPN backbone. Constructs a FCOS model with a ResNet-50-FPN backbone.
...@@ -682,34 +707,40 @@ def fcos_resnet50_fpn( ...@@ -682,34 +707,40 @@ def fcos_resnet50_fpn(
Example: Example:
>>> model = torchvision.models.detection.fcos_resnet50_fpn(pretrained=True) >>> model = torchvision.models.detection.fcos_resnet50_fpn(weights=FCOS_ResNet50_FPN_Weights.DEFAULT)
>>> model.eval() >>> model.eval()
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
>>> predictions = model(x) >>> predictions = model(x)
Args: Args:
pretrained (bool): If True, returns a model pre-trained on COCO train2017 weights (FCOS_ResNet50_FPN_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr progress (bool): If True, displays a progress bar of the download to stderr
num_classes (int): number of output classes of the model (including the background) num_classes (int, optional): number of output classes of the model (including the background)
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet weights_backbone (ResNet50_Weights, optional): The pretrained weights for the backbone
trainable_backbone_layers (int, optional): number of trainable (not frozen) resnet layers starting trainable_backbone_layers (int, optional): number of trainable (not frozen) resnet layers starting
from final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are from final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are
trainable. If ``None`` is passed (the default) this value is set to 3. Default: None trainable. If ``None`` is passed (the default) this value is set to 3. Default: None
""" """
is_trained = pretrained or pretrained_backbone weights = FCOS_ResNet50_FPN_Weights.verify(weights)
weights_backbone = ResNet50_Weights.verify(weights_backbone)
if weights is not None:
weights_backbone = None
num_classes = _ovewrite_value_param(num_classes, len(weights.meta["categories"]))
elif num_classes is None:
num_classes = 91
is_trained = weights is not None or weights_backbone is not None
trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3) trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3)
norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
if pretrained: backbone = resnet50(weights=weights_backbone, progress=progress, norm_layer=norm_layer)
# no need to download the backbone if pretrained is set
pretrained_backbone = False
backbone = resnet50(pretrained=pretrained_backbone, progress=progress, norm_layer=norm_layer)
backbone = _resnet_fpn_extractor( backbone = _resnet_fpn_extractor(
backbone, trainable_backbone_layers, returned_layers=[2, 3, 4], extra_blocks=LastLevelP6P7(256, 256) backbone, trainable_backbone_layers, returned_layers=[2, 3, 4], extra_blocks=LastLevelP6P7(256, 256)
) )
model = FCOS(backbone, num_classes, **kwargs) model = FCOS(backbone, num_classes, **kwargs)
if pretrained:
state_dict = load_state_dict_from_url(model_urls["fcos_resnet50_fpn_coco"], progress=progress) if weights is not None:
model.load_state_dict(state_dict) model.load_state_dict(weights.get_state_dict(progress=progress))
return model return model
from typing import Any, Optional
import torch import torch
from torch import nn from torch import nn
from torchvision.ops import MultiScaleRoIAlign from torchvision.ops import MultiScaleRoIAlign
from ..._internally_replaced_utils import load_state_dict_from_url
from ...ops import misc as misc_nn_ops from ...ops import misc as misc_nn_ops
from ..resnet import resnet50 from ...transforms._presets import ObjectDetection, InterpolationMode
from .._api import WeightsEnum, Weights
from .._meta import _COCO_PERSON_CATEGORIES, _COCO_PERSON_KEYPOINT_NAMES
from .._utils import handle_legacy_interface, _ovewrite_value_param
from ..resnet import ResNet50_Weights, resnet50
from ._utils import overwrite_eps from ._utils import overwrite_eps
from .backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers from .backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers
from .faster_rcnn import FasterRCNN from .faster_rcnn import FasterRCNN
__all__ = ["KeypointRCNN", "keypointrcnn_resnet50_fpn"] __all__ = [
"KeypointRCNN",
"KeypointRCNN_ResNet50_FPN_Weights",
"keypointrcnn_resnet50_fpn",
]
class KeypointRCNN(FasterRCNN): class KeypointRCNN(FasterRCNN):
...@@ -110,7 +119,7 @@ class KeypointRCNN(FasterRCNN): ...@@ -110,7 +119,7 @@ class KeypointRCNN(FasterRCNN):
>>> >>>
>>> # load a pre-trained model for classification and return >>> # load a pre-trained model for classification and return
>>> # only the features >>> # only the features
>>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features >>> backbone = torchvision.models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).features
>>> # KeypointRCNN needs to know the number of >>> # KeypointRCNN needs to know the number of
>>> # output channels in a backbone. For mobilenet_v2, it's 1280 >>> # output channels in a backbone. For mobilenet_v2, it's 1280
>>> # so we need to add it here >>> # so we need to add it here
...@@ -296,22 +305,61 @@ class KeypointRCNNPredictor(nn.Module): ...@@ -296,22 +305,61 @@ class KeypointRCNNPredictor(nn.Module):
) )
model_urls = { _COMMON_META = {
# legacy model for BC reasons, see https://github.com/pytorch/vision/issues/1606 "task": "image_object_detection",
"keypointrcnn_resnet50_fpn_coco_legacy": "https://download.pytorch.org/models/keypointrcnn_resnet50_fpn_coco-9f466800.pth", "architecture": "KeypointRCNN",
"keypointrcnn_resnet50_fpn_coco": "https://download.pytorch.org/models/keypointrcnn_resnet50_fpn_coco-fc266e95.pth", "publication_year": 2017,
"categories": _COCO_PERSON_CATEGORIES,
"keypoint_names": _COCO_PERSON_KEYPOINT_NAMES,
"interpolation": InterpolationMode.BILINEAR,
} }
class KeypointRCNN_ResNet50_FPN_Weights(WeightsEnum):
COCO_LEGACY = Weights(
url="https://download.pytorch.org/models/keypointrcnn_resnet50_fpn_coco-9f466800.pth",
transforms=ObjectDetection,
meta={
**_COMMON_META,
"num_params": 59137258,
"recipe": "https://github.com/pytorch/vision/issues/1606",
"map": 50.6,
"map_kp": 61.1,
},
)
COCO_V1 = Weights(
url="https://download.pytorch.org/models/keypointrcnn_resnet50_fpn_coco-fc266e95.pth",
transforms=ObjectDetection,
meta={
**_COMMON_META,
"num_params": 59137258,
"recipe": "https://github.com/pytorch/vision/tree/main/references/detection#keypoint-r-cnn",
"map": 54.6,
"map_kp": 65.0,
},
)
DEFAULT = COCO_V1
@handle_legacy_interface(
weights=(
"pretrained",
lambda kwargs: KeypointRCNN_ResNet50_FPN_Weights.COCO_LEGACY
if kwargs["pretrained"] == "legacy"
else KeypointRCNN_ResNet50_FPN_Weights.COCO_V1,
),
weights_backbone=("pretrained_backbone", ResNet50_Weights.IMAGENET1K_V1),
)
def keypointrcnn_resnet50_fpn( def keypointrcnn_resnet50_fpn(
pretrained=False, *,
progress=True, weights: Optional[KeypointRCNN_ResNet50_FPN_Weights] = None,
num_classes=2, progress: bool = True,
num_keypoints=17, num_classes: Optional[int] = None,
pretrained_backbone=True, num_keypoints: Optional[int] = None,
trainable_backbone_layers=None, weights_backbone: Optional[ResNet50_Weights] = ResNet50_Weights.IMAGENET1K_V1,
**kwargs, trainable_backbone_layers: Optional[int] = None,
): **kwargs: Any,
) -> KeypointRCNN:
""" """
Constructs a Keypoint R-CNN model with a ResNet-50-FPN backbone. Constructs a Keypoint R-CNN model with a ResNet-50-FPN backbone.
...@@ -350,7 +398,7 @@ def keypointrcnn_resnet50_fpn( ...@@ -350,7 +398,7 @@ def keypointrcnn_resnet50_fpn(
Example:: Example::
>>> model = torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=True) >>> model = torchvision.models.detection.keypointrcnn_resnet50_fpn(weights=KeypointRCNN_ResNet50_FPN_Weights.DEFAULT)
>>> model.eval() >>> model.eval()
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
>>> predictions = model(x) >>> predictions = model(x)
...@@ -359,31 +407,39 @@ def keypointrcnn_resnet50_fpn( ...@@ -359,31 +407,39 @@ def keypointrcnn_resnet50_fpn(
>>> torch.onnx.export(model, x, "keypoint_rcnn.onnx", opset_version = 11) >>> torch.onnx.export(model, x, "keypoint_rcnn.onnx", opset_version = 11)
Args: Args:
pretrained (bool): If True, returns a model pre-trained on COCO train2017 weights (KeypointRCNN_ResNet50_FPN_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr progress (bool): If True, displays a progress bar of the download to stderr
num_classes (int): number of output classes of the model (including the background) num_classes (int, optional): number of output classes of the model (including the background)
num_keypoints (int): number of keypoints, default 17 num_keypoints (int, optional): number of keypoints
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet weights_backbone (ResNet50_Weights, optional): The pretrained weights for the backbone
trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block. trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block.
Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. If ``None`` is Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. If ``None`` is
passed (the default) this value is set to 3. passed (the default) this value is set to 3.
""" """
is_trained = pretrained or pretrained_backbone weights = KeypointRCNN_ResNet50_FPN_Weights.verify(weights)
weights_backbone = ResNet50_Weights.verify(weights_backbone)
if weights is not None:
weights_backbone = None
num_classes = _ovewrite_value_param(num_classes, len(weights.meta["categories"]))
num_keypoints = _ovewrite_value_param(num_keypoints, len(weights.meta["keypoint_names"]))
else:
if num_classes is None:
num_classes = 2
if num_keypoints is None:
num_keypoints = 17
is_trained = weights is not None or weights_backbone is not None
trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3) trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3)
norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
if pretrained: backbone = resnet50(weights=weights_backbone, progress=progress, norm_layer=norm_layer)
# no need to download the backbone if pretrained is set
pretrained_backbone = False
backbone = resnet50(pretrained=pretrained_backbone, progress=progress, norm_layer=norm_layer)
backbone = _resnet_fpn_extractor(backbone, trainable_backbone_layers) backbone = _resnet_fpn_extractor(backbone, trainable_backbone_layers)
model = KeypointRCNN(backbone, num_classes, num_keypoints=num_keypoints, **kwargs) model = KeypointRCNN(backbone, num_classes, num_keypoints=num_keypoints, **kwargs)
if pretrained:
key = "keypointrcnn_resnet50_fpn_coco" if weights is not None:
if pretrained == "legacy": model.load_state_dict(weights.get_state_dict(progress=progress))
key += "_legacy" if weights == KeypointRCNN_ResNet50_FPN_Weights.COCO_V1:
state_dict = load_state_dict_from_url(model_urls[key], progress=progress) overwrite_eps(model, 0.0)
model.load_state_dict(state_dict)
overwrite_eps(model, 0.0)
return model return model
from collections import OrderedDict from collections import OrderedDict
from typing import Any, Optional
from torch import nn from torch import nn
from torchvision.ops import MultiScaleRoIAlign from torchvision.ops import MultiScaleRoIAlign
from ..._internally_replaced_utils import load_state_dict_from_url
from ...ops import misc as misc_nn_ops from ...ops import misc as misc_nn_ops
from ..resnet import resnet50 from ...transforms._presets import ObjectDetection, InterpolationMode
from .._api import WeightsEnum, Weights
from .._meta import _COCO_CATEGORIES
from .._utils import handle_legacy_interface, _ovewrite_value_param
from ..resnet import ResNet50_Weights, resnet50
from ._utils import overwrite_eps from ._utils import overwrite_eps
from .backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers from .backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers
from .faster_rcnn import FasterRCNN from .faster_rcnn import FasterRCNN
__all__ = [ __all__ = [
"MaskRCNN", "MaskRCNN",
"MaskRCNN_ResNet50_FPN_Weights",
"maskrcnn_resnet50_fpn", "maskrcnn_resnet50_fpn",
] ]
...@@ -112,7 +118,7 @@ class MaskRCNN(FasterRCNN): ...@@ -112,7 +118,7 @@ class MaskRCNN(FasterRCNN):
>>> >>>
>>> # load a pre-trained model for classification and return >>> # load a pre-trained model for classification and return
>>> # only the features >>> # only the features
>>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features >>> backbone = torchvision.models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).features
>>> # MaskRCNN needs to know the number of >>> # MaskRCNN needs to know the number of
>>> # output channels in a backbone. For mobilenet_v2, it's 1280 >>> # output channels in a backbone. For mobilenet_v2, it's 1280
>>> # so we need to add it here >>> # so we need to add it here
...@@ -299,14 +305,38 @@ class MaskRCNNPredictor(nn.Sequential): ...@@ -299,14 +305,38 @@ class MaskRCNNPredictor(nn.Sequential):
# nn.init.constant_(param, 0) # nn.init.constant_(param, 0)
model_urls = { class MaskRCNN_ResNet50_FPN_Weights(WeightsEnum):
"maskrcnn_resnet50_fpn_coco": "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth", COCO_V1 = Weights(
} url="https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth",
transforms=ObjectDetection,
meta={
"task": "image_object_detection",
"architecture": "MaskRCNN",
"publication_year": 2017,
"num_params": 44401393,
"categories": _COCO_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
"recipe": "https://github.com/pytorch/vision/tree/main/references/detection#mask-r-cnn",
"map": 37.9,
"map_mask": 34.6,
},
)
DEFAULT = COCO_V1
@handle_legacy_interface(
weights=("pretrained", MaskRCNN_ResNet50_FPN_Weights.COCO_V1),
weights_backbone=("pretrained_backbone", ResNet50_Weights.IMAGENET1K_V1),
)
def maskrcnn_resnet50_fpn( def maskrcnn_resnet50_fpn(
pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=None, **kwargs *,
): weights: Optional[MaskRCNN_ResNet50_FPN_Weights] = None,
progress: bool = True,
num_classes: Optional[int] = None,
weights_backbone: Optional[ResNet50_Weights] = ResNet50_Weights.IMAGENET1K_V1,
trainable_backbone_layers: Optional[int] = None,
**kwargs: Any,
) -> MaskRCNN:
""" """
Constructs a Mask R-CNN model with a ResNet-50-FPN backbone. Constructs a Mask R-CNN model with a ResNet-50-FPN backbone.
...@@ -346,7 +376,7 @@ def maskrcnn_resnet50_fpn( ...@@ -346,7 +376,7 @@ def maskrcnn_resnet50_fpn(
Example:: Example::
>>> model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True) >>> model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights=MaskRCNN_ResNet50_FPN_Weights.DEFAULT)
>>> model.eval() >>> model.eval()
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
>>> predictions = model(x) >>> predictions = model(x)
...@@ -355,27 +385,34 @@ def maskrcnn_resnet50_fpn( ...@@ -355,27 +385,34 @@ def maskrcnn_resnet50_fpn(
>>> torch.onnx.export(model, x, "mask_rcnn.onnx", opset_version = 11) >>> torch.onnx.export(model, x, "mask_rcnn.onnx", opset_version = 11)
Args: Args:
pretrained (bool): If True, returns a model pre-trained on COCO train2017 weights (MaskRCNN_ResNet50_FPN_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr progress (bool): If True, displays a progress bar of the download to stderr
num_classes (int): number of output classes of the model (including the background) num_classes (int, optional): number of output classes of the model (including the background)
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet weights_backbone (ResNet50_Weights, optional): The pretrained weights for the backbone
trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block. trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block.
Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. If ``None`` is Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. If ``None`` is
passed (the default) this value is set to 3. passed (the default) this value is set to 3.
""" """
is_trained = pretrained or pretrained_backbone weights = MaskRCNN_ResNet50_FPN_Weights.verify(weights)
weights_backbone = ResNet50_Weights.verify(weights_backbone)
if weights is not None:
weights_backbone = None
num_classes = _ovewrite_value_param(num_classes, len(weights.meta["categories"]))
elif num_classes is None:
num_classes = 91
is_trained = weights is not None or weights_backbone is not None
trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3) trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3)
norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
if pretrained: backbone = resnet50(weights=weights_backbone, progress=progress, norm_layer=norm_layer)
# no need to download the backbone if pretrained is set
pretrained_backbone = False
backbone = resnet50(pretrained=pretrained_backbone, progress=progress, norm_layer=norm_layer)
backbone = _resnet_fpn_extractor(backbone, trainable_backbone_layers) backbone = _resnet_fpn_extractor(backbone, trainable_backbone_layers)
model = MaskRCNN(backbone, num_classes, **kwargs) model = MaskRCNN(backbone, num_classes=num_classes, **kwargs)
if pretrained:
state_dict = load_state_dict_from_url(model_urls["maskrcnn_resnet50_fpn_coco"], progress=progress) if weights is not None:
model.load_state_dict(state_dict) model.load_state_dict(weights.get_state_dict(progress=progress))
overwrite_eps(model, 0.0) if weights == MaskRCNN_ResNet50_FPN_Weights.COCO_V1:
overwrite_eps(model, 0.0)
return model return model
import math import math
import warnings import warnings
from collections import OrderedDict from collections import OrderedDict
from typing import Dict, List, Tuple, Optional from typing import Any, Dict, List, Tuple, Optional
import torch import torch
from torch import nn, Tensor from torch import nn, Tensor
from ..._internally_replaced_utils import load_state_dict_from_url
from ...ops import sigmoid_focal_loss from ...ops import sigmoid_focal_loss
from ...ops import boxes as box_ops from ...ops import boxes as box_ops
from ...ops import misc as misc_nn_ops from ...ops import misc as misc_nn_ops
from ...ops.feature_pyramid_network import LastLevelP6P7 from ...ops.feature_pyramid_network import LastLevelP6P7
from ...transforms._presets import ObjectDetection, InterpolationMode
from ...utils import _log_api_usage_once from ...utils import _log_api_usage_once
from ..resnet import resnet50 from .._api import WeightsEnum, Weights
from .._meta import _COCO_CATEGORIES
from .._utils import handle_legacy_interface, _ovewrite_value_param
from ..resnet import ResNet50_Weights, resnet50
from . import _utils as det_utils from . import _utils as det_utils
from ._utils import overwrite_eps from ._utils import overwrite_eps
from .anchor_utils import AnchorGenerator from .anchor_utils import AnchorGenerator
...@@ -20,7 +23,11 @@ from .backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers ...@@ -20,7 +23,11 @@ from .backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers
from .transform import GeneralizedRCNNTransform from .transform import GeneralizedRCNNTransform
__all__ = ["RetinaNet", "retinanet_resnet50_fpn"] __all__ = [
"RetinaNet",
"RetinaNet_ResNet50_FPN_Weights",
"retinanet_resnet50_fpn",
]
def _sum(x: List[Tensor]) -> Tensor: def _sum(x: List[Tensor]) -> Tensor:
...@@ -286,7 +293,7 @@ class RetinaNet(nn.Module): ...@@ -286,7 +293,7 @@ class RetinaNet(nn.Module):
>>> from torchvision.models.detection.anchor_utils import AnchorGenerator >>> from torchvision.models.detection.anchor_utils import AnchorGenerator
>>> # load a pre-trained model for classification and return >>> # load a pre-trained model for classification and return
>>> # only the features >>> # only the features
>>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features >>> backbone = torchvision.models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).features
>>> # RetinaNet needs to know the number of >>> # RetinaNet needs to know the number of
>>> # output channels in a backbone. For mobilenet_v2, it's 1280 >>> # output channels in a backbone. For mobilenet_v2, it's 1280
>>> # so we need to add it here >>> # so we need to add it here
...@@ -578,14 +585,37 @@ class RetinaNet(nn.Module): ...@@ -578,14 +585,37 @@ class RetinaNet(nn.Module):
return self.eager_outputs(losses, detections) return self.eager_outputs(losses, detections)
model_urls = { class RetinaNet_ResNet50_FPN_Weights(WeightsEnum):
"retinanet_resnet50_fpn_coco": "https://download.pytorch.org/models/retinanet_resnet50_fpn_coco-eeacb38b.pth", COCO_V1 = Weights(
} url="https://download.pytorch.org/models/retinanet_resnet50_fpn_coco-eeacb38b.pth",
transforms=ObjectDetection,
meta={
"task": "image_object_detection",
"architecture": "RetinaNet",
"publication_year": 2017,
"num_params": 34014999,
"categories": _COCO_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
"recipe": "https://github.com/pytorch/vision/tree/main/references/detection#retinanet",
"map": 36.4,
},
)
DEFAULT = COCO_V1
@handle_legacy_interface(
weights=("pretrained", RetinaNet_ResNet50_FPN_Weights.COCO_V1),
weights_backbone=("pretrained_backbone", ResNet50_Weights.IMAGENET1K_V1),
)
def retinanet_resnet50_fpn( def retinanet_resnet50_fpn(
pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=None, **kwargs *,
): weights: Optional[RetinaNet_ResNet50_FPN_Weights] = None,
progress: bool = True,
num_classes: Optional[int] = None,
weights_backbone: Optional[ResNet50_Weights] = ResNet50_Weights.IMAGENET1K_V1,
trainable_backbone_layers: Optional[int] = None,
**kwargs: Any,
) -> RetinaNet:
""" """
Constructs a RetinaNet model with a ResNet-50-FPN backbone. Constructs a RetinaNet model with a ResNet-50-FPN backbone.
...@@ -619,36 +649,43 @@ def retinanet_resnet50_fpn( ...@@ -619,36 +649,43 @@ def retinanet_resnet50_fpn(
Example:: Example::
>>> model = torchvision.models.detection.retinanet_resnet50_fpn(pretrained=True) >>> model = torchvision.models.detection.retinanet_resnet50_fpn(weights=RetinaNet_ResNet50_FPN_Weights.DEFAULT)
>>> model.eval() >>> model.eval()
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
>>> predictions = model(x) >>> predictions = model(x)
Args: Args:
pretrained (bool): If True, returns a model pre-trained on COCO train2017 weights (RetinaNet_ResNet50_FPN_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr progress (bool): If True, displays a progress bar of the download to stderr
num_classes (int): number of output classes of the model (including the background) num_classes (int, optional): number of output classes of the model (including the background)
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet weights_backbone (ResNet50_Weights, optional): The pretrained weights for the backbone
trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block. trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block.
Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. If ``None`` is Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. If ``None`` is
passed (the default) this value is set to 3. passed (the default) this value is set to 3.
""" """
is_trained = pretrained or pretrained_backbone weights = RetinaNet_ResNet50_FPN_Weights.verify(weights)
weights_backbone = ResNet50_Weights.verify(weights_backbone)
if weights is not None:
weights_backbone = None
num_classes = _ovewrite_value_param(num_classes, len(weights.meta["categories"]))
elif num_classes is None:
num_classes = 91
is_trained = weights is not None or weights_backbone is not None
trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3) trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3)
norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
if pretrained: backbone = resnet50(weights=weights_backbone, progress=progress, norm_layer=norm_layer)
# no need to download the backbone if pretrained is set
pretrained_backbone = False
backbone = resnet50(pretrained=pretrained_backbone, progress=progress, norm_layer=norm_layer)
# skip P2 because it generates too many anchors (according to their paper) # skip P2 because it generates too many anchors (according to their paper)
backbone = _resnet_fpn_extractor( backbone = _resnet_fpn_extractor(
backbone, trainable_backbone_layers, returned_layers=[2, 3, 4], extra_blocks=LastLevelP6P7(256, 256) backbone, trainable_backbone_layers, returned_layers=[2, 3, 4], extra_blocks=LastLevelP6P7(256, 256)
) )
model = RetinaNet(backbone, num_classes, **kwargs) model = RetinaNet(backbone, num_classes, **kwargs)
if pretrained:
state_dict = load_state_dict_from_url(model_urls["retinanet_resnet50_fpn_coco"], progress=progress) if weights is not None:
model.load_state_dict(state_dict) model.load_state_dict(weights.get_state_dict(progress=progress))
overwrite_eps(model, 0.0) if weights == RetinaNet_ResNet50_FPN_Weights.COCO_V1:
overwrite_eps(model, 0.0)
return model return model
...@@ -4,8 +4,7 @@ import torch ...@@ -4,8 +4,7 @@ import torch
import torch.nn.functional as F import torch.nn.functional as F
import torchvision import torchvision
from torch import nn, Tensor from torch import nn, Tensor
from torchvision.ops import boxes as box_ops from torchvision.ops import boxes as box_ops, roi_align
from torchvision.ops import roi_align
from . import _utils as det_utils from . import _utils as det_utils
......
...@@ -6,27 +6,42 @@ import torch ...@@ -6,27 +6,42 @@ import torch
import torch.nn.functional as F import torch.nn.functional as F
from torch import nn, Tensor from torch import nn, Tensor
from ..._internally_replaced_utils import load_state_dict_from_url
from ...ops import boxes as box_ops from ...ops import boxes as box_ops
from ...transforms._presets import ObjectDetection, InterpolationMode
from ...utils import _log_api_usage_once from ...utils import _log_api_usage_once
from .. import vgg from .._api import WeightsEnum, Weights
from .._meta import _COCO_CATEGORIES
from .._utils import handle_legacy_interface, _ovewrite_value_param
from ..vgg import VGG, VGG16_Weights, vgg16
from . import _utils as det_utils from . import _utils as det_utils
from .anchor_utils import DefaultBoxGenerator from .anchor_utils import DefaultBoxGenerator
from .backbone_utils import _validate_trainable_layers from .backbone_utils import _validate_trainable_layers
from .transform import GeneralizedRCNNTransform from .transform import GeneralizedRCNNTransform
__all__ = ["SSD", "ssd300_vgg16"]
model_urls = { __all__ = [
"ssd300_vgg16_coco": "https://download.pytorch.org/models/ssd300_vgg16_coco-b556d3b4.pth", "SSD300_VGG16_Weights",
} "ssd300_vgg16",
]
backbone_urls = {
# We port the features of a VGG16 backbone trained by amdegroot because unlike the one on TorchVision, it uses the
# same input standardization method as the paper. Ref: https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth class SSD300_VGG16_Weights(WeightsEnum):
# Only the `features` weights have proper values, those on the `classifier` module are filled with nans. COCO_V1 = Weights(
"vgg16_features": "https://download.pytorch.org/models/vgg16_features-amdegroot-88682ab5.pth" url="https://download.pytorch.org/models/ssd300_vgg16_coco-b556d3b4.pth",
} transforms=ObjectDetection,
meta={
"task": "image_object_detection",
"architecture": "SSD",
"publication_year": 2015,
"num_params": 35641826,
"size": (300, 300),
"categories": _COCO_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
"recipe": "https://github.com/pytorch/vision/tree/main/references/detection#ssd300-vgg16",
"map": 25.1,
},
)
DEFAULT = COCO_V1
def _xavier_init(conv: nn.Module): def _xavier_init(conv: nn.Module):
...@@ -528,7 +543,7 @@ class SSDFeatureExtractorVGG(nn.Module): ...@@ -528,7 +543,7 @@ class SSDFeatureExtractorVGG(nn.Module):
return OrderedDict([(str(i), v) for i, v in enumerate(output)]) return OrderedDict([(str(i), v) for i, v in enumerate(output)])
def _vgg_extractor(backbone: vgg.VGG, highres: bool, trainable_layers: int): def _vgg_extractor(backbone: VGG, highres: bool, trainable_layers: int):
backbone = backbone.features backbone = backbone.features
# Gather the indices of maxpools. These are the locations of output blocks. # Gather the indices of maxpools. These are the locations of output blocks.
stage_indices = [0] + [i for i, b in enumerate(backbone) if isinstance(b, nn.MaxPool2d)][:-1] stage_indices = [0] + [i for i, b in enumerate(backbone) if isinstance(b, nn.MaxPool2d)][:-1]
...@@ -546,14 +561,19 @@ def _vgg_extractor(backbone: vgg.VGG, highres: bool, trainable_layers: int): ...@@ -546,14 +561,19 @@ def _vgg_extractor(backbone: vgg.VGG, highres: bool, trainable_layers: int):
return SSDFeatureExtractorVGG(backbone, highres) return SSDFeatureExtractorVGG(backbone, highres)
@handle_legacy_interface(
weights=("pretrained", SSD300_VGG16_Weights.COCO_V1),
weights_backbone=("pretrained_backbone", VGG16_Weights.IMAGENET1K_FEATURES),
)
def ssd300_vgg16( def ssd300_vgg16(
pretrained: bool = False, *,
weights: Optional[SSD300_VGG16_Weights] = None,
progress: bool = True, progress: bool = True,
num_classes: int = 91, num_classes: Optional[int] = None,
pretrained_backbone: bool = True, weights_backbone: Optional[VGG16_Weights] = VGG16_Weights.IMAGENET1K_FEATURES,
trainable_backbone_layers: Optional[int] = None, trainable_backbone_layers: Optional[int] = None,
**kwargs: Any, **kwargs: Any,
): ) -> SSD:
"""Constructs an SSD model with input size 300x300 and a VGG16 backbone. """Constructs an SSD model with input size 300x300 and a VGG16 backbone.
Reference: `"SSD: Single Shot MultiBox Detector" <https://arxiv.org/abs/1512.02325>`_. Reference: `"SSD: Single Shot MultiBox Detector" <https://arxiv.org/abs/1512.02325>`_.
...@@ -585,37 +605,38 @@ def ssd300_vgg16( ...@@ -585,37 +605,38 @@ def ssd300_vgg16(
Example: Example:
>>> model = torchvision.models.detection.ssd300_vgg16(pretrained=True) >>> model = torchvision.models.detection.ssd300_vgg16(weights=SSD300_VGG16_Weights.DEFAULT)
>>> model.eval() >>> model.eval()
>>> x = [torch.rand(3, 300, 300), torch.rand(3, 500, 400)] >>> x = [torch.rand(3, 300, 300), torch.rand(3, 500, 400)]
>>> predictions = model(x) >>> predictions = model(x)
Args: Args:
pretrained (bool): If True, returns a model pre-trained on COCO train2017 weights (SSD300_VGG16_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr progress (bool): If True, displays a progress bar of the download to stderr
num_classes (int): number of output classes of the model (including the background) num_classes (int, optional): number of output classes of the model (including the background)
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet weights_backbone (VGG16_Weights, optional): The pretrained weights for the backbone
trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block. trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block.
Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. If ``None`` is Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. If ``None`` is
passed (the default) this value is set to 4. passed (the default) this value is set to 4.
""" """
weights = SSD300_VGG16_Weights.verify(weights)
weights_backbone = VGG16_Weights.verify(weights_backbone)
if "size" in kwargs: if "size" in kwargs:
warnings.warn("The size of the model is already fixed; ignoring the argument.") warnings.warn("The size of the model is already fixed; ignoring the parameter.")
if weights is not None:
weights_backbone = None
num_classes = _ovewrite_value_param(num_classes, len(weights.meta["categories"]))
elif num_classes is None:
num_classes = 91
trainable_backbone_layers = _validate_trainable_layers( trainable_backbone_layers = _validate_trainable_layers(
pretrained or pretrained_backbone, trainable_backbone_layers, 5, 4 weights is not None or weights_backbone is not None, trainable_backbone_layers, 5, 4
) )
if pretrained:
# no need to download the backbone if pretrained is set
pretrained_backbone = False
# Use custom backbones more appropriate for SSD # Use custom backbones more appropriate for SSD
backbone = vgg.vgg16(pretrained=False, progress=progress) backbone = vgg16(weights=weights_backbone, progress=progress)
if pretrained_backbone:
state_dict = load_state_dict_from_url(backbone_urls["vgg16_features"], progress=progress)
backbone.load_state_dict(state_dict)
backbone = _vgg_extractor(backbone, False, trainable_backbone_layers) backbone = _vgg_extractor(backbone, False, trainable_backbone_layers)
anchor_generator = DefaultBoxGenerator( anchor_generator = DefaultBoxGenerator(
[[2], [2, 3], [2, 3], [2, 3], [2], [2]], [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
...@@ -628,12 +649,10 @@ def ssd300_vgg16( ...@@ -628,12 +649,10 @@ def ssd300_vgg16(
"image_mean": [0.48235, 0.45882, 0.40784], "image_mean": [0.48235, 0.45882, 0.40784],
"image_std": [1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0], # undo the 0-1 scaling of toTensor "image_std": [1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0], # undo the 0-1 scaling of toTensor
} }
kwargs = {**defaults, **kwargs} kwargs: Any = {**defaults, **kwargs}
model = SSD(backbone, anchor_generator, (300, 300), num_classes, **kwargs) model = SSD(backbone, anchor_generator, (300, 300), num_classes, **kwargs)
if pretrained:
weights_name = "ssd300_vgg16_coco" if weights is not None:
if model_urls.get(weights_name, None) is None: model.load_state_dict(weights.get_state_dict(progress=progress))
raise ValueError(f"No checkpoint is available for model {weights_name}")
state_dict = load_state_dict_from_url(model_urls[weights_name], progress=progress)
model.load_state_dict(state_dict)
return model return model
...@@ -6,21 +6,24 @@ from typing import Any, Callable, Dict, List, Optional, Union ...@@ -6,21 +6,24 @@ from typing import Any, Callable, Dict, List, Optional, Union
import torch import torch
from torch import nn, Tensor from torch import nn, Tensor
from ..._internally_replaced_utils import load_state_dict_from_url
from ...ops.misc import Conv2dNormActivation from ...ops.misc import Conv2dNormActivation
from ...transforms._presets import ObjectDetection, InterpolationMode
from ...utils import _log_api_usage_once from ...utils import _log_api_usage_once
from .. import mobilenet from .. import mobilenet
from .._api import WeightsEnum, Weights
from .._meta import _COCO_CATEGORIES
from .._utils import handle_legacy_interface, _ovewrite_value_param
from ..mobilenetv3 import MobileNet_V3_Large_Weights, mobilenet_v3_large
from . import _utils as det_utils from . import _utils as det_utils
from .anchor_utils import DefaultBoxGenerator from .anchor_utils import DefaultBoxGenerator
from .backbone_utils import _validate_trainable_layers from .backbone_utils import _validate_trainable_layers
from .ssd import SSD, SSDScoringHead from .ssd import SSD, SSDScoringHead
__all__ = ["ssdlite320_mobilenet_v3_large"] __all__ = [
"SSDLite320_MobileNet_V3_Large_Weights",
model_urls = { "ssdlite320_mobilenet_v3_large",
"ssdlite320_mobilenet_v3_large_coco": "https://download.pytorch.org/models/ssdlite320_mobilenet_v3_large_coco-a79551df.pth" ]
}
# Building blocks of SSDlite as described in section 6.2 of MobileNetV2 paper # Building blocks of SSDlite as described in section 6.2 of MobileNetV2 paper
...@@ -181,15 +184,39 @@ def _mobilenet_extractor( ...@@ -181,15 +184,39 @@ def _mobilenet_extractor(
return SSDLiteFeatureExtractorMobileNet(backbone, stage_indices[-2], norm_layer) return SSDLiteFeatureExtractorMobileNet(backbone, stage_indices[-2], norm_layer)
class SSDLite320_MobileNet_V3_Large_Weights(WeightsEnum):
COCO_V1 = Weights(
url="https://download.pytorch.org/models/ssdlite320_mobilenet_v3_large_coco-a79551df.pth",
transforms=ObjectDetection,
meta={
"task": "image_object_detection",
"architecture": "SSDLite",
"publication_year": 2018,
"num_params": 3440060,
"size": (320, 320),
"categories": _COCO_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
"recipe": "https://github.com/pytorch/vision/tree/main/references/detection#ssdlite320-mobilenetv3-large",
"map": 21.3,
},
)
DEFAULT = COCO_V1
@handle_legacy_interface(
weights=("pretrained", SSDLite320_MobileNet_V3_Large_Weights.COCO_V1),
weights_backbone=("pretrained_backbone", MobileNet_V3_Large_Weights.IMAGENET1K_V1),
)
def ssdlite320_mobilenet_v3_large( def ssdlite320_mobilenet_v3_large(
pretrained: bool = False, *,
weights: Optional[SSDLite320_MobileNet_V3_Large_Weights] = None,
progress: bool = True, progress: bool = True,
num_classes: int = 91, num_classes: Optional[int] = None,
pretrained_backbone: bool = False, weights_backbone: Optional[MobileNet_V3_Large_Weights] = MobileNet_V3_Large_Weights.IMAGENET1K_V1,
trainable_backbone_layers: Optional[int] = None, trainable_backbone_layers: Optional[int] = None,
norm_layer: Optional[Callable[..., nn.Module]] = None, norm_layer: Optional[Callable[..., nn.Module]] = None,
**kwargs: Any, **kwargs: Any,
): ) -> SSD:
"""Constructs an SSDlite model with input size 320x320 and a MobileNetV3 Large backbone, as described at """Constructs an SSDlite model with input size 320x320 and a MobileNetV3 Large backbone, as described at
`"Searching for MobileNetV3" `"Searching for MobileNetV3"
<https://arxiv.org/abs/1905.02244>`_ and <https://arxiv.org/abs/1905.02244>`_ and
...@@ -200,41 +227,47 @@ def ssdlite320_mobilenet_v3_large( ...@@ -200,41 +227,47 @@ def ssdlite320_mobilenet_v3_large(
Example: Example:
>>> model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(pretrained=True) >>> model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(weights=SSDLite320_MobileNet_V3_Large_Weights.DEFAULT)
>>> model.eval() >>> model.eval()
>>> x = [torch.rand(3, 320, 320), torch.rand(3, 500, 400)] >>> x = [torch.rand(3, 320, 320), torch.rand(3, 500, 400)]
>>> predictions = model(x) >>> predictions = model(x)
Args: Args:
pretrained (bool): If True, returns a model pre-trained on COCO train2017 weights (FasterRCNN_ResNet50_FPN_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr progress (bool): If True, displays a progress bar of the download to stderr
num_classes (int): number of output classes of the model (including the background) num_classes (int, optional): number of output classes of the model (including the background)
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet weights_backbone (ResNet50_Weights, optional): The pretrained weights for the backbone
trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block. trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block.
Valid values are between 0 and 6, with 6 meaning all backbone layers are trainable. If ``None`` is Valid values are between 0 and 6, with 6 meaning all backbone layers are trainable. If ``None`` is
passed (the default) this value is set to 6. passed (the default) this value is set to 6.
norm_layer (callable, optional): Module specifying the normalization layer to use. norm_layer (callable, optional): Module specifying the normalization layer to use.
""" """
weights = SSDLite320_MobileNet_V3_Large_Weights.verify(weights)
weights_backbone = MobileNet_V3_Large_Weights.verify(weights_backbone)
if "size" in kwargs: if "size" in kwargs:
warnings.warn("The size of the model is already fixed; ignoring the argument.") warnings.warn("The size of the model is already fixed; ignoring the parameter.")
if weights is not None:
weights_backbone = None
num_classes = _ovewrite_value_param(num_classes, len(weights.meta["categories"]))
elif num_classes is None:
num_classes = 91
trainable_backbone_layers = _validate_trainable_layers( trainable_backbone_layers = _validate_trainable_layers(
pretrained or pretrained_backbone, trainable_backbone_layers, 6, 6 weights is not None or weights_backbone is not None, trainable_backbone_layers, 6, 6
) )
if pretrained:
pretrained_backbone = False
# Enable reduced tail if no pretrained backbone is selected. See Table 6 of MobileNetV3 paper. # Enable reduced tail if no pretrained backbone is selected. See Table 6 of MobileNetV3 paper.
reduce_tail = not pretrained_backbone reduce_tail = weights_backbone is None
if norm_layer is None: if norm_layer is None:
norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.03) norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.03)
backbone = mobilenet.mobilenet_v3_large( backbone = mobilenet_v3_large(
pretrained=pretrained_backbone, progress=progress, norm_layer=norm_layer, reduced_tail=reduce_tail, **kwargs weights=weights_backbone, progress=progress, norm_layer=norm_layer, reduced_tail=reduce_tail, **kwargs
) )
if not pretrained_backbone: if weights_backbone is None:
# Change the default initialization scheme if not pretrained # Change the default initialization scheme if not pretrained
_normal_init(backbone) _normal_init(backbone)
backbone = _mobilenet_extractor( backbone = _mobilenet_extractor(
...@@ -262,7 +295,7 @@ def ssdlite320_mobilenet_v3_large( ...@@ -262,7 +295,7 @@ def ssdlite320_mobilenet_v3_large(
"image_mean": [0.5, 0.5, 0.5], "image_mean": [0.5, 0.5, 0.5],
"image_std": [0.5, 0.5, 0.5], "image_std": [0.5, 0.5, 0.5],
} }
kwargs = {**defaults, **kwargs} kwargs: Any = {**defaults, **kwargs}
model = SSD( model = SSD(
backbone, backbone,
anchor_generator, anchor_generator,
...@@ -272,10 +305,7 @@ def ssdlite320_mobilenet_v3_large( ...@@ -272,10 +305,7 @@ def ssdlite320_mobilenet_v3_large(
**kwargs, **kwargs,
) )
if pretrained: if weights is not None:
weights_name = "ssdlite320_mobilenet_v3_large_coco" model.load_state_dict(weights.get_state_dict(progress=progress))
if model_urls.get(weights_name, None) is None:
raise ValueError(f"No checkpoint is available for model {weights_name}")
state_dict = load_state_dict_from_url(model_urls[weights_name], progress=progress)
model.load_state_dict(state_dict)
return model return model
This diff is collapsed.
import warnings import warnings
from collections import namedtuple from collections import namedtuple
from functools import partial
from typing import Optional, Tuple, List, Callable, Any from typing import Optional, Tuple, List, Callable, Any
import torch import torch
...@@ -7,15 +8,15 @@ import torch.nn as nn ...@@ -7,15 +8,15 @@ import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torch import Tensor from torch import Tensor
from .._internally_replaced_utils import load_state_dict_from_url from ..transforms._presets import ImageClassification, InterpolationMode
from ..utils import _log_api_usage_once from ..utils import _log_api_usage_once
from ._api import WeightsEnum, Weights
from ._meta import _IMAGENET_CATEGORIES
from ._utils import handle_legacy_interface, _ovewrite_named_param
__all__ = ["GoogLeNet", "googlenet", "GoogLeNetOutputs", "_GoogLeNetOutputs"]
model_urls = { __all__ = ["GoogLeNet", "GoogLeNetOutputs", "_GoogLeNetOutputs", "GoogLeNet_Weights", "googlenet"]
# GoogLeNet ported from TensorFlow
"googlenet": "https://download.pytorch.org/models/googlenet-1378be20.pth",
}
GoogLeNetOutputs = namedtuple("GoogLeNetOutputs", ["logits", "aux_logits2", "aux_logits1"]) GoogLeNetOutputs = namedtuple("GoogLeNetOutputs", ["logits", "aux_logits2", "aux_logits1"])
GoogLeNetOutputs.__annotations__ = {"logits": Tensor, "aux_logits2": Optional[Tensor], "aux_logits1": Optional[Tensor]} GoogLeNetOutputs.__annotations__ = {"logits": Tensor, "aux_logits2": Optional[Tensor], "aux_logits1": Optional[Tensor]}
...@@ -274,38 +275,62 @@ class BasicConv2d(nn.Module): ...@@ -274,38 +275,62 @@ class BasicConv2d(nn.Module):
return F.relu(x, inplace=True) return F.relu(x, inplace=True)
def googlenet(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> GoogLeNet: class GoogLeNet_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/googlenet-1378be20.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
"task": "image_classification",
"architecture": "GoogLeNet",
"publication_year": 2014,
"num_params": 6624904,
"size": (224, 224),
"min_size": (15, 15),
"categories": _IMAGENET_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#googlenet",
"acc@1": 69.778,
"acc@5": 89.530,
},
)
DEFAULT = IMAGENET1K_V1
@handle_legacy_interface(weights=("pretrained", GoogLeNet_Weights.IMAGENET1K_V1))
def googlenet(*, weights: Optional[GoogLeNet_Weights] = None, progress: bool = True, **kwargs: Any) -> GoogLeNet:
r"""GoogLeNet (Inception v1) model architecture from r"""GoogLeNet (Inception v1) model architecture from
`"Going Deeper with Convolutions" <http://arxiv.org/abs/1409.4842>`_. `"Going Deeper with Convolutions" <http://arxiv.org/abs/1409.4842>`_.
The required minimum input size of the model is 15x15. The required minimum input size of the model is 15x15.
Args: Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet weights (GoogLeNet_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr progress (bool): If True, displays a progress bar of the download to stderr
aux_logits (bool): If True, adds two auxiliary branches that can improve training. aux_logits (bool): If True, adds two auxiliary branches that can improve training.
Default: *False* when pretrained is True otherwise *True* Default: *False* when pretrained is True otherwise *True*
transform_input (bool): If True, preprocesses the input according to the method with which it transform_input (bool): If True, preprocesses the input according to the method with which it
was trained on ImageNet. Default: True if ``pretrained=True``, else False. was trained on ImageNet. Default: True if ``weights=GoogLeNet_Weights.IMAGENET1K_V1``, else False.
""" """
if pretrained: weights = GoogLeNet_Weights.verify(weights)
original_aux_logits = kwargs.get("aux_logits", False)
if weights is not None:
if "transform_input" not in kwargs: if "transform_input" not in kwargs:
kwargs["transform_input"] = True _ovewrite_named_param(kwargs, "transform_input", True)
if "aux_logits" not in kwargs: _ovewrite_named_param(kwargs, "aux_logits", True)
kwargs["aux_logits"] = False _ovewrite_named_param(kwargs, "init_weights", False)
if kwargs["aux_logits"]: _ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"]))
warnings.warn(
"auxiliary heads in the pretrained googlenet model are NOT pretrained, so make sure to train them" model = GoogLeNet(**kwargs)
)
original_aux_logits = kwargs["aux_logits"] if weights is not None:
kwargs["aux_logits"] = True model.load_state_dict(weights.get_state_dict(progress=progress))
kwargs["init_weights"] = False
model = GoogLeNet(**kwargs)
state_dict = load_state_dict_from_url(model_urls["googlenet"], progress=progress)
model.load_state_dict(state_dict)
if not original_aux_logits: if not original_aux_logits:
model.aux_logits = False model.aux_logits = False
model.aux1 = None # type: ignore[assignment] model.aux1 = None # type: ignore[assignment]
model.aux2 = None # type: ignore[assignment] model.aux2 = None # type: ignore[assignment]
return model else:
warnings.warn(
"auxiliary heads in the pretrained googlenet model are NOT pretrained, so make sure to train them"
)
return GoogLeNet(**kwargs) return model
import warnings import warnings
from collections import namedtuple from collections import namedtuple
from functools import partial
from typing import Callable, Any, Optional, Tuple, List from typing import Callable, Any, Optional, Tuple, List
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
from torch import nn, Tensor from torch import nn, Tensor
from .._internally_replaced_utils import load_state_dict_from_url from ..transforms._presets import ImageClassification, InterpolationMode
from ..utils import _log_api_usage_once from ..utils import _log_api_usage_once
from ._api import WeightsEnum, Weights
from ._meta import _IMAGENET_CATEGORIES
from ._utils import handle_legacy_interface, _ovewrite_named_param
__all__ = ["Inception3", "inception_v3", "InceptionOutputs", "_InceptionOutputs"] __all__ = ["Inception3", "InceptionOutputs", "_InceptionOutputs", "Inception_V3_Weights", "inception_v3"]
model_urls = {
# Inception v3 ported from TensorFlow
"inception_v3_google": "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth",
}
InceptionOutputs = namedtuple("InceptionOutputs", ["logits", "aux_logits"]) InceptionOutputs = namedtuple("InceptionOutputs", ["logits", "aux_logits"])
InceptionOutputs.__annotations__ = {"logits": Tensor, "aux_logits": Optional[Tensor]} InceptionOutputs.__annotations__ = {"logits": Tensor, "aux_logits": Optional[Tensor]}
...@@ -408,7 +407,29 @@ class BasicConv2d(nn.Module): ...@@ -408,7 +407,29 @@ class BasicConv2d(nn.Module):
return F.relu(x, inplace=True) return F.relu(x, inplace=True)
def inception_v3(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> Inception3: class Inception_V3_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth",
transforms=partial(ImageClassification, crop_size=299, resize_size=342),
meta={
"task": "image_classification",
"architecture": "InceptionV3",
"publication_year": 2015,
"num_params": 27161264,
"size": (299, 299),
"min_size": (75, 75),
"categories": _IMAGENET_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#inception-v3",
"acc@1": 77.294,
"acc@5": 93.450,
},
)
DEFAULT = IMAGENET1K_V1
@handle_legacy_interface(weights=("pretrained", Inception_V3_Weights.IMAGENET1K_V1))
def inception_v3(*, weights: Optional[Inception_V3_Weights] = None, progress: bool = True, **kwargs: Any) -> Inception3:
r"""Inception v3 model architecture from r"""Inception v3 model architecture from
`"Rethinking the Inception Architecture for Computer Vision" <http://arxiv.org/abs/1512.00567>`_. `"Rethinking the Inception Architecture for Computer Vision" <http://arxiv.org/abs/1512.00567>`_.
The required minimum input size of the model is 75x75. The required minimum input size of the model is 75x75.
...@@ -418,28 +439,29 @@ def inception_v3(pretrained: bool = False, progress: bool = True, **kwargs: Any) ...@@ -418,28 +439,29 @@ def inception_v3(pretrained: bool = False, progress: bool = True, **kwargs: Any)
N x 3 x 299 x 299, so ensure your images are sized accordingly. N x 3 x 299 x 299, so ensure your images are sized accordingly.
Args: Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet weights (Inception_V3_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr progress (bool): If True, displays a progress bar of the download to stderr
aux_logits (bool): If True, add an auxiliary branch that can improve training. aux_logits (bool): If True, add an auxiliary branch that can improve training.
Default: *True* Default: *True*
transform_input (bool): If True, preprocesses the input according to the method with which it transform_input (bool): If True, preprocesses the input according to the method with which it
was trained on ImageNet. Default: True if ``pretrained=True``, else False. was trained on ImageNet. Default: True if ``weights=Inception_V3_Weights.IMAGENET1K_V1``, else False.
""" """
if pretrained: weights = Inception_V3_Weights.verify(weights)
original_aux_logits = kwargs.get("aux_logits", True)
if weights is not None:
if "transform_input" not in kwargs: if "transform_input" not in kwargs:
kwargs["transform_input"] = True _ovewrite_named_param(kwargs, "transform_input", True)
if "aux_logits" in kwargs: _ovewrite_named_param(kwargs, "aux_logits", True)
original_aux_logits = kwargs["aux_logits"] _ovewrite_named_param(kwargs, "init_weights", False)
kwargs["aux_logits"] = True _ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"]))
else:
original_aux_logits = True model = Inception3(**kwargs)
kwargs["init_weights"] = False # we are loading weights from a pretrained model
model = Inception3(**kwargs) if weights is not None:
state_dict = load_state_dict_from_url(model_urls["inception_v3_google"], progress=progress) model.load_state_dict(weights.get_state_dict(progress=progress))
model.load_state_dict(state_dict)
if not original_aux_logits: if not original_aux_logits:
model.aux_logits = False model.aux_logits = False
model.AuxLogits = None model.AuxLogits = None
return model
return Inception3(**kwargs) return model
import warnings import warnings
from typing import Any, Dict, List from functools import partial
from typing import Any, Dict, List, Optional
import torch import torch
import torch.nn as nn import torch.nn as nn
from torch import Tensor from torch import Tensor
from .._internally_replaced_utils import load_state_dict_from_url from ..transforms._presets import ImageClassification, InterpolationMode
from ..utils import _log_api_usage_once from ..utils import _log_api_usage_once
from ._api import WeightsEnum, Weights
from ._meta import _IMAGENET_CATEGORIES
from ._utils import handle_legacy_interface, _ovewrite_named_param
__all__ = ["MNASNet", "mnasnet0_5", "mnasnet0_75", "mnasnet1_0", "mnasnet1_3"]
_MODEL_URLS = { __all__ = [
"mnasnet0_5": "https://download.pytorch.org/models/mnasnet0.5_top1_67.823-3ffadce67e.pth", "MNASNet",
"mnasnet0_75": None, "MNASNet0_5_Weights",
"mnasnet1_0": "https://download.pytorch.org/models/mnasnet1.0_top1_73.512-f206786ef8.pth", "MNASNet0_75_Weights",
"mnasnet1_3": None, "MNASNet1_0_Weights",
} "MNASNet1_3_Weights",
"mnasnet0_5",
"mnasnet0_75",
"mnasnet1_0",
"mnasnet1_3",
]
# Paper suggests 0.9997 momentum, for TensorFlow. Equivalent PyTorch momentum is # Paper suggests 0.9997 momentum, for TensorFlow. Equivalent PyTorch momentum is
# 1.0 - tensorflow. # 1.0 - tensorflow.
...@@ -202,68 +211,123 @@ class MNASNet(torch.nn.Module): ...@@ -202,68 +211,123 @@ class MNASNet(torch.nn.Module):
) )
def _load_pretrained(model_name: str, model: nn.Module, progress: bool) -> None: _COMMON_META = {
if model_name not in _MODEL_URLS or _MODEL_URLS[model_name] is None: "task": "image_classification",
raise ValueError(f"No checkpoint is available for model type {model_name}") "architecture": "MNASNet",
checkpoint_url = _MODEL_URLS[model_name] "publication_year": 2018,
model.load_state_dict(load_state_dict_from_url(checkpoint_url, progress=progress)) "size": (224, 224),
"min_size": (1, 1),
"categories": _IMAGENET_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
"recipe": "https://github.com/1e100/mnasnet_trainer",
}
class MNASNet0_5_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/mnasnet0.5_top1_67.823-3ffadce67e.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"num_params": 2218512,
"acc@1": 67.734,
"acc@5": 87.490,
},
)
DEFAULT = IMAGENET1K_V1
class MNASNet0_75_Weights(WeightsEnum):
# If a default model is added here the corresponding changes need to be done in mnasnet0_75
pass
class MNASNet1_0_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/mnasnet1.0_top1_73.512-f206786ef8.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"num_params": 4383312,
"acc@1": 73.456,
"acc@5": 91.510,
},
)
DEFAULT = IMAGENET1K_V1
class MNASNet1_3_Weights(WeightsEnum):
# If a default model is added here the corresponding changes need to be done in mnasnet1_3
pass
def _mnasnet(alpha: float, weights: Optional[WeightsEnum], progress: bool, **kwargs: Any) -> MNASNet:
if weights is not None:
_ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"]))
model = MNASNet(alpha, **kwargs)
def mnasnet0_5(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MNASNet: if weights:
model.load_state_dict(weights.get_state_dict(progress=progress))
return model
@handle_legacy_interface(weights=("pretrained", MNASNet0_5_Weights.IMAGENET1K_V1))
def mnasnet0_5(*, weights: Optional[MNASNet0_5_Weights] = None, progress: bool = True, **kwargs: Any) -> MNASNet:
r"""MNASNet with depth multiplier of 0.5 from r"""MNASNet with depth multiplier of 0.5 from
`"MnasNet: Platform-Aware Neural Architecture Search for Mobile" `"MnasNet: Platform-Aware Neural Architecture Search for Mobile"
<https://arxiv.org/pdf/1807.11626.pdf>`_. <https://arxiv.org/pdf/1807.11626.pdf>`_.
Args: Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet weights (MNASNet0_5_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr progress (bool): If True, displays a progress bar of the download to stderr
""" """
model = MNASNet(0.5, **kwargs) weights = MNASNet0_5_Weights.verify(weights)
if pretrained:
_load_pretrained("mnasnet0_5", model, progress) return _mnasnet(0.5, weights, progress, **kwargs)
return model
def mnasnet0_75(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MNASNet: @handle_legacy_interface(weights=("pretrained", None))
def mnasnet0_75(*, weights: Optional[MNASNet0_75_Weights] = None, progress: bool = True, **kwargs: Any) -> MNASNet:
r"""MNASNet with depth multiplier of 0.75 from r"""MNASNet with depth multiplier of 0.75 from
`"MnasNet: Platform-Aware Neural Architecture Search for Mobile" `"MnasNet: Platform-Aware Neural Architecture Search for Mobile"
<https://arxiv.org/pdf/1807.11626.pdf>`_. <https://arxiv.org/pdf/1807.11626.pdf>`_.
Args: Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet weights (MNASNet0_75_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr progress (bool): If True, displays a progress bar of the download to stderr
""" """
model = MNASNet(0.75, **kwargs) weights = MNASNet0_75_Weights.verify(weights)
if pretrained:
_load_pretrained("mnasnet0_75", model, progress) return _mnasnet(0.75, weights, progress, **kwargs)
return model
def mnasnet1_0(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MNASNet: @handle_legacy_interface(weights=("pretrained", MNASNet1_0_Weights.IMAGENET1K_V1))
def mnasnet1_0(*, weights: Optional[MNASNet1_0_Weights] = None, progress: bool = True, **kwargs: Any) -> MNASNet:
r"""MNASNet with depth multiplier of 1.0 from r"""MNASNet with depth multiplier of 1.0 from
`"MnasNet: Platform-Aware Neural Architecture Search for Mobile" `"MnasNet: Platform-Aware Neural Architecture Search for Mobile"
<https://arxiv.org/pdf/1807.11626.pdf>`_. <https://arxiv.org/pdf/1807.11626.pdf>`_.
Args: Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet weights (MNASNet1_0_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr progress (bool): If True, displays a progress bar of the download to stderr
""" """
model = MNASNet(1.0, **kwargs) weights = MNASNet1_0_Weights.verify(weights)
if pretrained:
_load_pretrained("mnasnet1_0", model, progress)
return model
return _mnasnet(1.0, weights, progress, **kwargs)
def mnasnet1_3(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MNASNet:
@handle_legacy_interface(weights=("pretrained", None))
def mnasnet1_3(*, weights: Optional[MNASNet1_3_Weights] = None, progress: bool = True, **kwargs: Any) -> MNASNet:
r"""MNASNet with depth multiplier of 1.3 from r"""MNASNet with depth multiplier of 1.3 from
`"MnasNet: Platform-Aware Neural Architecture Search for Mobile" `"MnasNet: Platform-Aware Neural Architecture Search for Mobile"
<https://arxiv.org/pdf/1807.11626.pdf>`_. <https://arxiv.org/pdf/1807.11626.pdf>`_.
Args: Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet weights (MNASNet1_3_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr progress (bool): If True, displays a progress bar of the download to stderr
""" """
model = MNASNet(1.3, **kwargs) weights = MNASNet1_3_Weights.verify(weights)
if pretrained:
_load_pretrained("mnasnet1_3", model, progress) return _mnasnet(1.3, weights, progress, **kwargs)
return model
from .mobilenetv2 import MobileNetV2, mobilenet_v2, __all__ as mv2_all from .mobilenetv2 import * # noqa: F401, F403
from .mobilenetv3 import MobileNetV3, mobilenet_v3_large, mobilenet_v3_small, __all__ as mv3_all from .mobilenetv3 import * # noqa: F401, F403
from .mobilenetv2 import __all__ as mv2_all
from .mobilenetv3 import __all__ as mv3_all
__all__ = mv2_all + mv3_all __all__ = mv2_all + mv3_all
import warnings import warnings
from functools import partial
from typing import Callable, Any, Optional, List from typing import Callable, Any, Optional, List
import torch import torch
from torch import Tensor from torch import Tensor
from torch import nn from torch import nn
from .._internally_replaced_utils import load_state_dict_from_url
from ..ops.misc import Conv2dNormActivation from ..ops.misc import Conv2dNormActivation
from ..transforms._presets import ImageClassification, InterpolationMode
from ..utils import _log_api_usage_once from ..utils import _log_api_usage_once
from ._utils import _make_divisible from ._api import WeightsEnum, Weights
from ._meta import _IMAGENET_CATEGORIES
from ._utils import handle_legacy_interface, _ovewrite_named_param, _make_divisible
__all__ = ["MobileNetV2", "mobilenet_v2"] __all__ = ["MobileNetV2", "MobileNet_V2_Weights", "mobilenet_v2"]
model_urls = {
"mobilenet_v2": "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth",
}
# necessary for backwards compatibility # necessary for backwards compatibility
...@@ -196,17 +194,62 @@ class MobileNetV2(nn.Module): ...@@ -196,17 +194,62 @@ class MobileNetV2(nn.Module):
return self._forward_impl(x) return self._forward_impl(x)
def mobilenet_v2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MobileNetV2: _COMMON_META = {
"task": "image_classification",
"architecture": "MobileNetV2",
"publication_year": 2018,
"num_params": 3504872,
"size": (224, 224),
"min_size": (1, 1),
"categories": _IMAGENET_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
}
class MobileNet_V2_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/mobilenet_v2-b0353104.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#mobilenetv2",
"acc@1": 71.878,
"acc@5": 90.286,
},
)
IMAGENET1K_V2 = Weights(
url="https://download.pytorch.org/models/mobilenet_v2-7ebf99e0.pth",
transforms=partial(ImageClassification, crop_size=224, resize_size=232),
meta={
**_COMMON_META,
"recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-reg-tuning",
"acc@1": 72.154,
"acc@5": 90.822,
},
)
DEFAULT = IMAGENET1K_V2
@handle_legacy_interface(weights=("pretrained", MobileNet_V2_Weights.IMAGENET1K_V1))
def mobilenet_v2(
*, weights: Optional[MobileNet_V2_Weights] = None, progress: bool = True, **kwargs: Any
) -> MobileNetV2:
""" """
Constructs a MobileNetV2 architecture from Constructs a MobileNetV2 architecture from
`"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_. `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_.
Args: Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet weights (MobileNet_V2_Weights, optional): The pretrained weights for the model
progress (bool): If True, displays a progress bar of the download to stderr progress (bool): If True, displays a progress bar of the download to stderr
""" """
weights = MobileNet_V2_Weights.verify(weights)
if weights is not None:
_ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"]))
model = MobileNetV2(**kwargs) model = MobileNetV2(**kwargs)
if pretrained:
state_dict = load_state_dict_from_url(model_urls["mobilenet_v2"], progress=progress) if weights is not None:
model.load_state_dict(state_dict) model.load_state_dict(weights.get_state_dict(progress=progress))
return model return model
This diff is collapsed.
from .raft import RAFT, raft_large, raft_small from .raft import *
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment