Unverified Commit 7d4bdd43 authored by Hu Ye's avatar Hu Ye Committed by GitHub
Browse files

add FCOS (#4961)



* add fcos

* update fcos

* add giou_loss

* add BoxLinearCoder for FCOS

* add full code for FCOS

* add giou loss

* add fcos

* add __all__

* Fixing lint

* Fixing lint in giou_loss.py

* Add typing annotation to fcos

* Add trained checkpoints

* Use partial to replace lambda

* Minor fixes to docstrings

* Apply ufmt format

* Fixing docstrings

* Fixing jit scripting

* Minor fixes to docstrings

* Fixing jit scripting

* Ignore mypy in fcos

* Fixing trained checkpoints

* Fixing unit-test of jit script

* Fixing docstrings

* Add test/expect/ModelTester.test_fcos_resnet50_fpn_expect.pkl

* Fixing test_detection_model_trainable_backbone_layers

* Update test_fcos_resnet50_fpn_expect.pkl

* rename stride to box size

* remove TODO and fix some typo

* merge some code for better

* impove the comments

* remove decode and encode of BoxLinearCoder

* remove some unnecessary hints

* use the default value in detectron2.

* update doc

* Add unittest for BoxLinearCoder

* Add types in FCOS

* Add docstring for BoxLinearCoder

* Minor fix for the docstring

* update doc

* Update fcos_resnet50_fpn_coco pretained weights url

* Update torchvision/models/detection/fcos.py
Co-authored-by: default avatarVasilis Vryniotis <datumbox@users.noreply.github.com>

* Update torchvision/models/detection/fcos.py
Co-authored-by: default avatarVasilis Vryniotis <datumbox@users.noreply.github.com>

* Update torchvision/models/detection/fcos.py
Co-authored-by: default avatarVasilis Vryniotis <datumbox@users.noreply.github.com>

* Update torchvision/models/detection/fcos.py
Co-authored-by: default avatarVasilis Vryniotis <datumbox@users.noreply.github.com>

* Add FCOS model documentation

* Fix typo in FCOS documentation

* Add fcos to the prototype builder

* Capitalize COCO_V1

* Fix params of fcos

* fix bug for partial

* Fixing docs indentation

* Fixing docs format in giou_loss

* Adopt Reference for GIoU Loss

* Rename giou_loss to generalized_box_iou_loss

* remove overwrite_eps

* Update AP test values

* Minor fixes for the docs

* Minor fixes for the docs

* Update torchvision/models/detection/fcos.py
Co-authored-by: default avatarZhiqiang Wang <zhiqwang@foxmail.com>

* Update torchvision/prototype/models/detection/fcos.py
Co-authored-by: default avatarZhiqiang Wang <zhiqwang@foxmail.com>
Co-authored-by: default avatarzhiqiang <zhiqwang@foxmail.com>
Co-authored-by: default avatarJoao Gomes <jdsgomes@fb.com>
Co-authored-by: default avatarVasilis Vryniotis <datumbox@users.noreply.github.com>
Co-authored-by: default avatarJoao Gomes <joaopsgomes@gmail.com>
parent fe65d379
...@@ -597,6 +597,7 @@ The models subpackage contains definitions for the following model ...@@ -597,6 +597,7 @@ The models subpackage contains definitions for the following model
architectures for detection: architectures for detection:
- `Faster R-CNN <https://arxiv.org/abs/1506.01497>`_ - `Faster R-CNN <https://arxiv.org/abs/1506.01497>`_
- `FCOS <https://arxiv.org/abs/1904.01355>`_
- `Mask R-CNN <https://arxiv.org/abs/1703.06870>`_ - `Mask R-CNN <https://arxiv.org/abs/1703.06870>`_
- `RetinaNet <https://arxiv.org/abs/1708.02002>`_ - `RetinaNet <https://arxiv.org/abs/1708.02002>`_
- `SSD <https://arxiv.org/abs/1512.02325>`_ - `SSD <https://arxiv.org/abs/1512.02325>`_
...@@ -642,6 +643,7 @@ Network box AP mask AP keypoint AP ...@@ -642,6 +643,7 @@ Network box AP mask AP keypoint AP
Faster R-CNN ResNet-50 FPN 37.0 - - Faster R-CNN ResNet-50 FPN 37.0 - -
Faster R-CNN MobileNetV3-Large FPN 32.8 - - Faster R-CNN MobileNetV3-Large FPN 32.8 - -
Faster R-CNN MobileNetV3-Large 320 FPN 22.8 - - Faster R-CNN MobileNetV3-Large 320 FPN 22.8 - -
FCOS ResNet-50 FPN 39.2 - -
RetinaNet ResNet-50 FPN 36.4 - - RetinaNet ResNet-50 FPN 36.4 - -
SSD300 VGG16 25.1 - - SSD300 VGG16 25.1 - -
SSDlite320 MobileNetV3-Large 21.3 - - SSDlite320 MobileNetV3-Large 21.3 - -
...@@ -702,6 +704,7 @@ Network train time (s / it) test time (s / it) ...@@ -702,6 +704,7 @@ Network train time (s / it) test time (s / it)
Faster R-CNN ResNet-50 FPN 0.2288 0.0590 5.2 Faster R-CNN ResNet-50 FPN 0.2288 0.0590 5.2
Faster R-CNN MobileNetV3-Large FPN 0.1020 0.0415 1.0 Faster R-CNN MobileNetV3-Large FPN 0.1020 0.0415 1.0
Faster R-CNN MobileNetV3-Large 320 FPN 0.0978 0.0376 0.6 Faster R-CNN MobileNetV3-Large 320 FPN 0.0978 0.0376 0.6
FCOS ResNet-50 FPN 0.1450 0.0539 3.3
RetinaNet ResNet-50 FPN 0.2514 0.0939 4.1 RetinaNet ResNet-50 FPN 0.2514 0.0939 4.1
SSD300 VGG16 0.2093 0.0744 1.5 SSD300 VGG16 0.2093 0.0744 1.5
SSDlite320 MobileNetV3-Large 0.1773 0.0906 1.5 SSDlite320 MobileNetV3-Large 0.1773 0.0906 1.5
...@@ -721,6 +724,15 @@ Faster R-CNN ...@@ -721,6 +724,15 @@ Faster R-CNN
torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn
torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn
FCOS
----
.. autosummary::
:toctree: generated/
:template: function.rst
torchvision.models.detection.fcos_resnet50_fpn
RetinaNet RetinaNet
--------- ---------
......
...@@ -70,6 +70,10 @@ ignore_errors = True ...@@ -70,6 +70,10 @@ ignore_errors = True
ignore_errors = True ignore_errors = True
[mypy-torchvision.models.detection.fcos]
ignore_errors = True
[mypy-torchvision.ops.*] [mypy-torchvision.ops.*]
ignore_errors = True ignore_errors = True
......
...@@ -41,6 +41,13 @@ torchrun --nproc_per_node=8 train.py\ ...@@ -41,6 +41,13 @@ torchrun --nproc_per_node=8 train.py\
--lr-steps 16 22 --aspect-ratio-group-factor 3 --lr-steps 16 22 --aspect-ratio-group-factor 3
``` ```
### FCOS ResNet-50 FPN
```
torchrun --nproc_per_node=8 train.py\
--dataset coco --model fcos_resnet50_fpn --epochs 26\
--lr-steps 16 22 --aspect-ratio-group-factor 3 --lr 0.01 --amp
```
### RetinaNet ### RetinaNet
``` ```
torchrun --nproc_per_node=8 train.py\ torchrun --nproc_per_node=8 train.py\
......
File suppressed by a .gitattributes entry or the file's encoding is unsupported.
...@@ -218,6 +218,7 @@ script_model_unwrapper = { ...@@ -218,6 +218,7 @@ script_model_unwrapper = {
"retinanet_resnet50_fpn": lambda x: x[1], "retinanet_resnet50_fpn": lambda x: x[1],
"ssd300_vgg16": lambda x: x[1], "ssd300_vgg16": lambda x: x[1],
"ssdlite320_mobilenet_v3_large": lambda x: x[1], "ssdlite320_mobilenet_v3_large": lambda x: x[1],
"fcos_resnet50_fpn": lambda x: x[1],
} }
...@@ -274,6 +275,13 @@ _model_params = { ...@@ -274,6 +275,13 @@ _model_params = {
"max_size": 224, "max_size": 224,
"input_shape": (3, 224, 224), "input_shape": (3, 224, 224),
}, },
"fcos_resnet50_fpn": {
"num_classes": 2,
"score_thresh": 0.05,
"min_size": 224,
"max_size": 224,
"input_shape": (3, 224, 224),
},
"maskrcnn_resnet50_fpn": { "maskrcnn_resnet50_fpn": {
"num_classes": 10, "num_classes": 10,
"min_size": 224, "min_size": 224,
...@@ -325,6 +333,10 @@ _model_tests_values = { ...@@ -325,6 +333,10 @@ _model_tests_values = {
"max_trainable": 6, "max_trainable": 6,
"n_trn_params_per_layer": [96, 99, 138, 200, 239, 257, 266], "n_trn_params_per_layer": [96, 99, 138, 200, 239, 257, 266],
}, },
"fcos_resnet50_fpn": {
"max_trainable": 5,
"n_trn_params_per_layer": [54, 64, 83, 96, 106, 107],
},
} }
......
...@@ -22,6 +22,19 @@ class TestModelsDetectionUtils: ...@@ -22,6 +22,19 @@ class TestModelsDetectionUtils:
assert neg[0].sum() == 3 assert neg[0].sum() == 3
assert neg[0][0:6].sum() == 3 assert neg[0][0:6].sum() == 3
def test_box_linear_coder(self):
box_coder = _utils.BoxLinearCoder(normalize_by_size=True)
# Generate a random 10x4 boxes tensor, with coordinates < 50.
boxes = torch.rand(10, 4) * 50
boxes.clamp_(min=1.0) # tiny boxes cause numerical instability in box regression
boxes[:, 2:] += boxes[:, :2]
proposals = torch.tensor([0, 0, 101, 101] * 10).reshape(10, 4).float()
rel_codes = box_coder.encode_single(boxes, proposals)
pred_boxes = box_coder.decode_single(rel_codes, boxes)
torch.allclose(proposals, pred_boxes)
@pytest.mark.parametrize("train_layers, exp_froz_params", [(0, 53), (1, 43), (2, 24), (3, 11), (4, 1), (5, 0)]) @pytest.mark.parametrize("train_layers, exp_froz_params", [(0, 53), (1, 43), (2, 24), (3, 11), (4, 1), (5, 0)])
def test_resnet_fpn_backbone_frozen_layers(self, train_layers, exp_froz_params): def test_resnet_fpn_backbone_frozen_layers(self, train_layers, exp_froz_params):
# we know how many initial layers and parameters of the network should # we know how many initial layers and parameters of the network should
......
...@@ -4,3 +4,4 @@ from .keypoint_rcnn import * ...@@ -4,3 +4,4 @@ from .keypoint_rcnn import *
from .retinanet import * from .retinanet import *
from .ssd import * from .ssd import *
from .ssdlite import * from .ssdlite import *
from .fcos import *
...@@ -217,6 +217,83 @@ class BoxCoder: ...@@ -217,6 +217,83 @@ class BoxCoder:
return pred_boxes return pred_boxes
class BoxLinearCoder:
"""
The linear box-to-box transform defined in FCOS. The transformation is parameterized
by the distance from the center of (square) src box to 4 edges of the target box.
"""
def __init__(self, normalize_by_size: bool = True) -> None:
"""
Args:
normalize_by_size (bool): normalize deltas by the size of src (anchor) boxes.
"""
self.normalize_by_size = normalize_by_size
def encode_single(self, reference_boxes: Tensor, proposals: Tensor) -> Tensor:
"""
Encode a set of proposals with respect to some reference boxes
Args:
reference_boxes (Tensor): reference boxes
proposals (Tensor): boxes to be encoded
Returns:
Tensor: the encoded relative box offsets that can be used to
decode the boxes.
"""
# get the center of reference_boxes
reference_boxes_ctr_x = 0.5 * (reference_boxes[:, 0] + reference_boxes[:, 2])
reference_boxes_ctr_y = 0.5 * (reference_boxes[:, 1] + reference_boxes[:, 3])
# get box regression transformation deltas
target_l = reference_boxes_ctr_x - proposals[:, 0]
target_t = reference_boxes_ctr_y - proposals[:, 1]
target_r = proposals[:, 2] - reference_boxes_ctr_x
target_b = proposals[:, 3] - reference_boxes_ctr_y
targets = torch.stack((target_l, target_t, target_r, target_b), dim=1)
if self.normalize_by_size:
reference_boxes_w = reference_boxes[:, 2] - reference_boxes[:, 0]
reference_boxes_h = reference_boxes[:, 3] - reference_boxes[:, 1]
reference_boxes_size = torch.stack(
(reference_boxes_w, reference_boxes_h, reference_boxes_w, reference_boxes_h), dim=1
)
targets = targets / reference_boxes_size
return targets
def decode_single(self, rel_codes: Tensor, boxes: Tensor) -> Tensor:
"""
From a set of original boxes and encoded relative box offsets,
get the decoded boxes.
Args:
rel_codes (Tensor): encoded boxes
boxes (Tensor): reference boxes.
Returns:
Tensor: the predicted boxes with the encoded relative box offsets.
"""
boxes = boxes.to(rel_codes.dtype)
ctr_x = 0.5 * (boxes[:, 0] + boxes[:, 2])
ctr_y = 0.5 * (boxes[:, 1] + boxes[:, 3])
if self.normalize_by_size:
boxes_w = boxes[:, 2] - boxes[:, 0]
boxes_h = boxes[:, 3] - boxes[:, 1]
boxes_size = torch.stack((boxes_w, boxes_h, boxes_w, boxes_h), dim=1)
rel_codes = rel_codes * boxes_size
pred_boxes1 = ctr_x - rel_codes[:, 0]
pred_boxes2 = ctr_y - rel_codes[:, 1]
pred_boxes3 = ctr_x + rel_codes[:, 2]
pred_boxes4 = ctr_y + rel_codes[:, 3]
pred_boxes = torch.stack((pred_boxes1, pred_boxes2, pred_boxes3, pred_boxes4), dim=1)
return pred_boxes
class Matcher: class Matcher:
""" """
This class assigns to each predicted "element" (e.g., a box) a ground-truth This class assigns to each predicted "element" (e.g., a box) a ground-truth
......
This diff is collapsed.
...@@ -13,6 +13,7 @@ from .boxes import box_convert ...@@ -13,6 +13,7 @@ from .boxes import box_convert
from .deform_conv import deform_conv2d, DeformConv2d from .deform_conv import deform_conv2d, DeformConv2d
from .feature_pyramid_network import FeaturePyramidNetwork from .feature_pyramid_network import FeaturePyramidNetwork
from .focal_loss import sigmoid_focal_loss from .focal_loss import sigmoid_focal_loss
from .generalized_box_iou_loss import generalized_box_iou_loss
from .misc import FrozenBatchNorm2d, ConvNormActivation, SqueezeExcitation from .misc import FrozenBatchNorm2d, ConvNormActivation, SqueezeExcitation
from .poolers import MultiScaleRoIAlign from .poolers import MultiScaleRoIAlign
from .ps_roi_align import ps_roi_align, PSRoIAlign from .ps_roi_align import ps_roi_align, PSRoIAlign
...@@ -52,4 +53,5 @@ __all__ = [ ...@@ -52,4 +53,5 @@ __all__ = [
"FrozenBatchNorm2d", "FrozenBatchNorm2d",
"ConvNormActivation", "ConvNormActivation",
"SqueezeExcitation", "SqueezeExcitation",
"generalized_box_iou_loss",
] ]
import torch
def generalized_box_iou_loss(
boxes1: torch.Tensor,
boxes2: torch.Tensor,
reduction: str = "none",
eps: float = 1e-7,
) -> torch.Tensor:
"""
Original implementation from
https://github.com/facebookresearch/fvcore/blob/bfff2ef/fvcore/nn/giou_loss.py
Gradient-friendly IoU loss with an additional penalty that is non-zero when the
boxes do not overlap and scales with the size of their smallest enclosing box.
This loss is symmetric, so the boxes1 and boxes2 arguments are interchangeable.
Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with
``0 <= x1 < x2`` and ``0 <= y1 < y2``, and The two boxes should have the
same dimensions.
Args:
boxes1 (Tensor[N, 4] or Tensor[4]): first set of boxes
boxes2 (Tensor[N, 4] or Tensor[4]): second set of boxes
reduction (string, optional): Specifies the reduction to apply to the output:
``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: No reduction will be
applied to the output. ``'mean'``: The output will be averaged.
``'sum'``: The output will be summed. Default: ``'none'``
eps (float, optional): small number to prevent division by zero. Default: 1e-7
Reference:
Hamid Rezatofighi et. al: Generalized Intersection over Union:
A Metric and A Loss for Bounding Box Regression:
https://arxiv.org/abs/1902.09630
"""
x1, y1, x2, y2 = boxes1.unbind(dim=-1)
x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1)
assert (x2 >= x1).all(), "bad box: x1 larger than x2"
assert (y2 >= y1).all(), "bad box: y1 larger than y2"
# Intersection keypoints
xkis1 = torch.max(x1, x1g)
ykis1 = torch.max(y1, y1g)
xkis2 = torch.min(x2, x2g)
ykis2 = torch.min(y2, y2g)
intsctk = torch.zeros_like(x1)
mask = (ykis2 > ykis1) & (xkis2 > xkis1)
intsctk[mask] = (xkis2[mask] - xkis1[mask]) * (ykis2[mask] - ykis1[mask])
unionk = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g) - intsctk
iouk = intsctk / (unionk + eps)
# smallest enclosing box
xc1 = torch.min(x1, x1g)
yc1 = torch.min(y1, y1g)
xc2 = torch.max(x2, x2g)
yc2 = torch.max(y2, y2g)
area_c = (xc2 - xc1) * (yc2 - yc1)
miouk = iouk - ((area_c - unionk) / (area_c + eps))
loss = 1 - miouk
if reduction == "mean":
loss = loss.mean() if loss.numel() > 0 else 0.0 * loss.sum()
elif reduction == "sum":
loss = loss.sum()
return loss
from .faster_rcnn import * from .faster_rcnn import *
from .fcos import *
from .keypoint_rcnn import * from .keypoint_rcnn import *
from .mask_rcnn import * from .mask_rcnn import *
from .retinanet import * from .retinanet import *
......
from typing import Any, Optional
from torchvision.prototype.transforms import CocoEval
from torchvision.transforms.functional import InterpolationMode
from ....models.detection.fcos import (
_resnet_fpn_extractor,
_validate_trainable_layers,
FCOS,
LastLevelP6P7,
misc_nn_ops,
)
from .._api import WeightsEnum, Weights
from .._meta import _COCO_CATEGORIES
from .._utils import handle_legacy_interface, _ovewrite_value_param
from ..resnet import ResNet50_Weights, resnet50
__all__ = [
"FCOS",
"FCOS_ResNet50_FPN_Weights",
"fcos_resnet50_fpn",
]
class FCOS_ResNet50_FPN_Weights(WeightsEnum):
COCO_V1 = Weights(
url="https://download.pytorch.org/models/fcos_resnet50_fpn_coco-99b0c9b7.pth",
transforms=CocoEval,
meta={
"task": "image_object_detection",
"architecture": "FCOS",
"publication_year": 2019,
"num_params": 32269600,
"categories": _COCO_CATEGORIES,
"interpolation": InterpolationMode.BILINEAR,
"recipe": "https://github.com/pytorch/vision/tree/main/references/detection#fcos-resnet-50-fpn",
"map": 39.2,
},
)
default = COCO_V1
@handle_legacy_interface(
weights=("pretrained", FCOS_ResNet50_FPN_Weights.COCO_V1),
weights_backbone=("pretrained_backbone", ResNet50_Weights.ImageNet1K_V1),
)
def fcos_resnet50_fpn(
*,
weights: Optional[FCOS_ResNet50_FPN_Weights] = None,
progress: bool = True,
num_classes: Optional[int] = None,
weights_backbone: Optional[ResNet50_Weights] = None,
trainable_backbone_layers: Optional[int] = None,
**kwargs: Any,
) -> FCOS:
weights = FCOS_ResNet50_FPN_Weights.verify(weights)
weights_backbone = ResNet50_Weights.verify(weights_backbone)
if weights is not None:
weights_backbone = None
num_classes = _ovewrite_value_param(num_classes, len(weights.meta["categories"]))
elif num_classes is None:
num_classes = 91
trainable_backbone_layers = _validate_trainable_layers(
weights is not None or weights_backbone is not None, trainable_backbone_layers, 5, 3
)
backbone = resnet50(weights=weights_backbone, progress=progress, norm_layer=misc_nn_ops.FrozenBatchNorm2d)
backbone = _resnet_fpn_extractor(
backbone, trainable_backbone_layers, returned_layers=[2, 3, 4], extra_blocks=LastLevelP6P7(256, 256)
)
model = FCOS(backbone, num_classes, **kwargs)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
return model
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment