Unverified Commit e35793a1 authored by Vasilis Vryniotis's avatar Vasilis Vryniotis Committed by GitHub
Browse files

Cerrypicking cleanups for SSD and SSDlite. (#3818)

parent 6374cff2
......@@ -426,8 +426,8 @@ Faster R-CNN ResNet-50 FPN 37.0 - -
Faster R-CNN MobileNetV3-Large FPN 32.8 - -
Faster R-CNN MobileNetV3-Large 320 FPN 22.8 - -
RetinaNet ResNet-50 FPN 36.4 - -
SSD VGG16 25.1 - -
SSDlite MobileNetV3-Large 21.3 - -
SSD300 VGG16 25.1 - -
SSDlite320 MobileNetV3-Large 21.3 - -
Mask R-CNN ResNet-50 FPN 37.9 34.6 -
====================================== ======= ======== ===========
......@@ -486,8 +486,8 @@ Faster R-CNN ResNet-50 FPN 0.2288 0.0590
Faster R-CNN MobileNetV3-Large FPN 0.1020 0.0415 1.0
Faster R-CNN MobileNetV3-Large 320 FPN 0.0978 0.0376 0.6
RetinaNet ResNet-50 FPN 0.2514 0.0939 4.1
SSD VGG16 0.2093 0.0744 1.5
SSDlite MobileNetV3-Large 0.1773 0.0906 1.5
SSD300 VGG16 0.2093 0.0744 1.5
SSDlite320 MobileNetV3-Large 0.1773 0.0906 1.5
Mask R-CNN ResNet-50 FPN 0.2728 0.0903 5.4
Keypoint R-CNN ResNet-50 FPN 0.3789 0.1242 6.8
====================================== =================== ================== ===========
......@@ -502,19 +502,19 @@ Faster R-CNN
RetinaNet
------------
---------
.. autofunction:: torchvision.models.detection.retinanet_resnet50_fpn
SSD
------------
---
.. autofunction:: torchvision.models.detection.ssd300_vgg16
SSDlite
------------
-------
.. autofunction:: torchvision.models.detection.ssdlite320_mobilenet_v3_large
......
......@@ -48,7 +48,7 @@ python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
--lr-steps 16 22 --aspect-ratio-group-factor 3 --lr 0.01
```
### SSD VGG16
### SSD300 VGG16
```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
--dataset coco --model ssd300_vgg16 --epochs 120\
......@@ -56,7 +56,7 @@ python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
--weight-decay 0.0005 --data-augmentation ssd
```
### SSDlite MobileNetV3-Large
### SSDlite320 MobileNetV3-Large
```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
--dataset coco --model ssdlite320_mobilenet_v3_large --epochs 660\
......
......@@ -410,7 +410,7 @@ class SSD(nn.Module):
class SSDFeatureExtractorVGG(nn.Module):
def __init__(self, backbone: nn.Module, highres: bool, rescaling: bool):
def __init__(self, backbone: nn.Module, highres: bool):
super().__init__()
_, _, maxpool3_pos, maxpool4_pos, _ = (i for i, layer in enumerate(backbone) if isinstance(layer, nn.MaxPool2d))
......@@ -476,13 +476,8 @@ class SSDFeatureExtractorVGG(nn.Module):
fc,
))
self.extra = extra
self.rescaling = rescaling
def forward(self, x: Tensor) -> Dict[str, Tensor]:
# Undo the 0-1 scaling of toTensor. Necessary for some backbones.
if self.rescaling:
x *= 255
# L2 regularization + Rescaling of 1st block's feature map
x = self.features(x)
rescaled = self.scale_weight.view(1, -1, 1, 1) * F.normalize(x)
......@@ -496,8 +491,7 @@ class SSDFeatureExtractorVGG(nn.Module):
return OrderedDict([(str(i), v) for i, v in enumerate(output)])
def _vgg_extractor(backbone_name: str, highres: bool, progress: bool, pretrained: bool, trainable_layers: int,
rescaling: bool):
def _vgg_extractor(backbone_name: str, highres: bool, progress: bool, pretrained: bool, trainable_layers: int):
if backbone_name in backbone_urls:
# Use custom backbones more appropriate for SSD
arch = backbone_name.split('_')[0]
......@@ -521,19 +515,19 @@ def _vgg_extractor(backbone_name: str, highres: bool, progress: bool, pretrained
for parameter in b.parameters():
parameter.requires_grad_(False)
return SSDFeatureExtractorVGG(backbone, highres, rescaling)
return SSDFeatureExtractorVGG(backbone, highres)
def ssd300_vgg16(pretrained: bool = False, progress: bool = True, num_classes: int = 91,
pretrained_backbone: bool = True, trainable_backbone_layers: Optional[int] = None, **kwargs: Any):
"""
Constructs an SSD model with a VGG16 backbone. See `SSD` for more details.
Constructs an SSD model with input size 300x300 and a VGG16 backbone. See `SSD` for more details.
Example:
>>> model = torchvision.models.detection.ssd300_vgg16(pretrained=True)
>>> model.eval()
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
>>> x = [torch.rand(3, 300, 300), torch.rand(3, 500, 400)]
>>> predictions = model(x)
Args:
......@@ -544,6 +538,9 @@ def ssd300_vgg16(pretrained: bool = False, progress: bool = True, num_classes: i
trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable.
"""
if "size" in kwargs:
warnings.warn("The size of the model is already fixed; ignoring the argument.")
trainable_backbone_layers = _validate_trainable_layers(
pretrained or pretrained_backbone, trainable_backbone_layers, 5, 5)
......@@ -551,12 +548,18 @@ def ssd300_vgg16(pretrained: bool = False, progress: bool = True, num_classes: i
# no need to download the backbone if pretrained is set
pretrained_backbone = False
backbone = _vgg_extractor("vgg16_features", False, progress, pretrained_backbone, trainable_backbone_layers, True)
backbone = _vgg_extractor("vgg16_features", False, progress, pretrained_backbone, trainable_backbone_layers)
anchor_generator = DefaultBoxGenerator([[2], [2, 3], [2, 3], [2, 3], [2], [2]],
scales=[0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05],
steps=[8, 16, 32, 64, 100, 300])
model = SSD(backbone, anchor_generator, (300, 300), num_classes,
image_mean=[0.48235, 0.45882, 0.40784], image_std=[1., 1., 1.], **kwargs)
defaults = {
# Rescale the input in a way compatible to the backbone
"image_mean": [0.48235, 0.45882, 0.40784],
"image_std": [1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0], # undo the 0-1 scaling of toTensor
}
kwargs = {**defaults, **kwargs}
model = SSD(backbone, anchor_generator, (300, 300), num_classes, **kwargs)
if pretrained:
weights_name = 'ssd300_vgg16_coco'
if model_urls.get(weights_name, None) is None:
......
import torch
import warnings
from collections import OrderedDict
from functools import partial
......@@ -94,8 +95,7 @@ class SSDLiteRegressionHead(SSDScoringHead):
class SSDLiteFeatureExtractorMobileNet(nn.Module):
def __init__(self, backbone: nn.Module, c4_pos: int, norm_layer: Callable[..., nn.Module], rescaling: bool,
**kwargs: Any):
def __init__(self, backbone: nn.Module, c4_pos: int, norm_layer: Callable[..., nn.Module], **kwargs: Any):
super().__init__()
# non-public config parameters
min_depth = kwargs.pop('_min_depth', 16)
......@@ -117,13 +117,8 @@ class SSDLiteFeatureExtractorMobileNet(nn.Module):
_normal_init(extra)
self.extra = extra
self.rescaling = rescaling
def forward(self, x: Tensor) -> Dict[str, Tensor]:
# Rescale from [0, 1] to [-1, -1]
if self.rescaling:
x = 2.0 * x - 1.0
# Get feature maps from backbone and extra. Can't be refactored due to JIT limitations.
output = []
for block in self.features:
......@@ -138,7 +133,7 @@ class SSDLiteFeatureExtractorMobileNet(nn.Module):
def _mobilenet_extractor(backbone_name: str, progress: bool, pretrained: bool, trainable_layers: int,
norm_layer: Callable[..., nn.Module], rescaling: bool, **kwargs: Any):
norm_layer: Callable[..., nn.Module], **kwargs: Any):
backbone = mobilenet.__dict__[backbone_name](pretrained=pretrained, progress=progress,
norm_layer=norm_layer, **kwargs).features
if not pretrained:
......@@ -158,7 +153,7 @@ def _mobilenet_extractor(backbone_name: str, progress: bool, pretrained: bool, t
for parameter in b.parameters():
parameter.requires_grad_(False)
return SSDLiteFeatureExtractorMobileNet(backbone, stage_indices[-2], norm_layer, rescaling, **kwargs)
return SSDLiteFeatureExtractorMobileNet(backbone, stage_indices[-2], norm_layer, **kwargs)
def ssdlite320_mobilenet_v3_large(pretrained: bool = False, progress: bool = True, num_classes: int = 91,
......@@ -166,7 +161,7 @@ def ssdlite320_mobilenet_v3_large(pretrained: bool = False, progress: bool = Tru
norm_layer: Optional[Callable[..., nn.Module]] = None,
**kwargs: Any):
"""
Constructs an SSDlite model with a MobileNetV3 Large backbone. See `SSD` for more details.
Constructs an SSDlite model with input size 320x320 and a MobileNetV3 Large backbone. See `SSD` for more details.
Example:
......@@ -186,20 +181,23 @@ def ssdlite320_mobilenet_v3_large(pretrained: bool = False, progress: bool = Tru
Valid values are between 0 and 6, with 6 meaning all backbone layers are trainable.
norm_layer (callable, optional): Module specifying the normalization layer to use.
"""
if "size" in kwargs:
warnings.warn("The size of the model is already fixed; ignoring the argument.")
trainable_backbone_layers = _validate_trainable_layers(
pretrained or pretrained_backbone, trainable_backbone_layers, 6, 6)
if pretrained:
pretrained_backbone = False
# Enable [-1, 1] rescaling and reduced tail if no pretrained backbone is selected
rescaling = reduce_tail = not pretrained_backbone
# Enable reduced tail if no pretrained backbone is selected
reduce_tail = not pretrained_backbone
if norm_layer is None:
norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.03)
backbone = _mobilenet_extractor("mobilenet_v3_large", progress, pretrained_backbone, trainable_backbone_layers,
norm_layer, rescaling, _reduced_tail=reduce_tail, _width_mult=1.0)
norm_layer, _reduced_tail=reduce_tail, _width_mult=1.0)
size = (320, 320)
anchor_generator = DefaultBoxGenerator([[2, 3] for _ in range(6)], min_ratio=0.2, max_ratio=0.95)
......@@ -212,8 +210,10 @@ def ssdlite320_mobilenet_v3_large(pretrained: bool = False, progress: bool = Tru
"nms_thresh": 0.55,
"detections_per_img": 300,
"topk_candidates": 300,
"image_mean": [0., 0., 0.],
"image_std": [1., 1., 1.],
# Rescale the input in a way compatible to the backbone:
# The following mean/std rescale the data from [0, 1] to [-1, -1]
"image_mean": [0.5, 0.5, 0.5],
"image_std": [0.5, 0.5, 0.5],
}
kwargs = {**defaults, **kwargs}
model = SSD(backbone, anchor_generator, size, num_classes,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment