"git@developer.sourcefind.cn:change/sglang.git" did not exist on "0e7b353009ae4e12d7e86e266a9f237be1f7d732"
Commit 828e457d authored by Tai-Wang's avatar Tai-Wang
Browse files

[Feature] FCOS3D BBox Coder (#940)

* FCOS3D BBox Coder

* Add unit tests

* Change the value from long to float/double

* Rename bbox_out as bbox

* Add comments to forward returns
parent 82a0c215
...@@ -55,6 +55,7 @@ model = dict( ...@@ -55,6 +55,7 @@ model = dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_centerness=dict( loss_centerness=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
bbox_coder=dict(type='FCOS3DBBoxCoder', code_size=9),
norm_on_bbox=True, norm_on_bbox=True,
centerness_on_reg=True, centerness_on_reg=True,
center_sampling=True, center_sampling=True,
......
...@@ -3,6 +3,7 @@ from mmdet.core.bbox import build_bbox_coder ...@@ -3,6 +3,7 @@ from mmdet.core.bbox import build_bbox_coder
from .anchor_free_bbox_coder import AnchorFreeBBoxCoder from .anchor_free_bbox_coder import AnchorFreeBBoxCoder
from .centerpoint_bbox_coders import CenterPointBBoxCoder from .centerpoint_bbox_coders import CenterPointBBoxCoder
from .delta_xyzwhlr_bbox_coder import DeltaXYZWLHRBBoxCoder from .delta_xyzwhlr_bbox_coder import DeltaXYZWLHRBBoxCoder
from .fcos3d_bbox_coder import FCOS3DBBoxCoder
from .groupfree3d_bbox_coder import GroupFree3DBBoxCoder from .groupfree3d_bbox_coder import GroupFree3DBBoxCoder
from .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder from .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder
from .point_xyzwhlr_bbox_coder import PointXYZWHLRBBoxCoder from .point_xyzwhlr_bbox_coder import PointXYZWHLRBBoxCoder
...@@ -10,5 +11,5 @@ from .point_xyzwhlr_bbox_coder import PointXYZWHLRBBoxCoder ...@@ -10,5 +11,5 @@ from .point_xyzwhlr_bbox_coder import PointXYZWHLRBBoxCoder
__all__ = [ __all__ = [
'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'PartialBinBasedBBoxCoder', 'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'PartialBinBasedBBoxCoder',
'CenterPointBBoxCoder', 'AnchorFreeBBoxCoder', 'GroupFree3DBBoxCoder', 'CenterPointBBoxCoder', 'AnchorFreeBBoxCoder', 'GroupFree3DBBoxCoder',
'PointXYZWHLRBBoxCoder' 'PointXYZWHLRBBoxCoder', 'FCOS3DBBoxCoder'
] ]
import numpy as np
import torch
from mmdet.core.bbox import BaseBBoxCoder
from mmdet.core.bbox.builder import BBOX_CODERS
from ..structures import limit_period
@BBOX_CODERS.register_module()
class FCOS3DBBoxCoder(BaseBBoxCoder):
"""Bounding box coder for FCOS3D.
Args:
base_depths (tuple[tuple[float]]): Depth references for decode box
depth. Defaults to None.
base_dims (tuple[tuple[float]]): Dimension references for decode box
dimension. Defaults to None.
code_size (int): The dimension of boxes to be encoded. Defaults to 7.
norm_on_bbox (bool): Whether to apply normalization on the bounding
box 2D attributes. Defaults to True.
"""
def __init__(self,
base_depths=None,
base_dims=None,
code_size=7,
norm_on_bbox=True):
super(FCOS3DBBoxCoder, self).__init__()
self.base_depths = base_depths
self.base_dims = base_dims
self.bbox_code_size = code_size
self.norm_on_bbox = norm_on_bbox
def encode(self, gt_bboxes_3d, gt_labels_3d, gt_bboxes, gt_labels):
# TODO: refactor the encoder in the FCOS3D and PGD head
pass
def decode(self, bbox, scale, stride, training, cls_score=None):
"""Decode regressed results into 3D predictions.
Note that offsets are not transformed to the projected 3D centers.
Args:
bbox (torch.Tensor): Raw bounding box predictions in shape
[N, C, H, W].
scale (tuple[`Scale`]): Learnable scale parameters.
stride (tuple[int]): Stride for a specific feature level.
training (bool): Whether the decoding is in the training
procedure.
cls_score (torch.Tensor): Classification score map for deciding
which base depth or dim is used. Defaults to None.
Returns:
torch.Tensor: Decoded boxes.
"""
# scale the bbox of different level
# only apply to offset, depth and size prediction
scale_offset, scale_depth, scale_size = scale[0:3]
clone_bbox = bbox.clone()
bbox[:, :2] = scale_offset(clone_bbox[:, :2]).float()
bbox[:, 2] = scale_depth(clone_bbox[:, 2]).float()
bbox[:, 3:6] = scale_size(clone_bbox[:, 3:6]).float()
if self.base_depths is None:
bbox[:, 2] = bbox[:, 2].exp()
elif len(self.base_depths) == 1: # only single prior
mean = self.base_depths[0][0]
std = self.base_depths[0][1]
bbox[:, 2] = mean + bbox.clone()[:, 2] * std
else: # multi-class priors
assert len(self.base_depths) == cls_score.shape[1], \
'The number of multi-class depth priors should be equal to ' \
'the number of categories.'
indices = cls_score.max(dim=1)[1]
depth_priors = cls_score.new_tensor(
self.base_depths)[indices, :].permute(0, 3, 1, 2)
mean = depth_priors[:, 0]
std = depth_priors[:, 1]
bbox[:, 2] = mean + bbox.clone()[:, 2] * std
bbox[:, 3:6] = bbox[:, 3:6].exp()
if self.base_dims is not None:
assert len(self.base_dims) == cls_score.shape[1], \
'The number of anchor sizes should be equal to the number ' \
'of categories.'
indices = cls_score.max(dim=1)[1]
size_priors = cls_score.new_tensor(
self.base_dims)[indices, :].permute(0, 3, 1, 2)
bbox[:, 3:6] = size_priors * bbox.clone()[:, 3:6]
assert self.norm_on_bbox is True, 'Setting norm_on_bbox to False '\
'has not been thoroughly tested for FCOS3D.'
if self.norm_on_bbox:
if not training:
# Note that this line is conducted only when testing
bbox[:, :2] *= stride
return bbox
@staticmethod
def decode_yaw(bbox, centers2d, dir_cls, dir_offset, cam2img):
"""Decode yaw angle and change it from local to global.i.
Args:
bbox (torch.Tensor): Bounding box predictions in shape
[N, C] with yaws to be decoded.
centers2d (torch.Tensor): Projected 3D-center on the image planes
corresponding to the box predictions.
dir_cls (torch.Tensor): Predicted direction classes.
dir_offset (float): Direction offset before dividing all the
directions into several classes.
cam2img (torch.Tensor): Camera intrinsic matrix in shape [4, 4].
Returns:
torch.Tensor: Bounding boxes with decoded yaws.
"""
if bbox.shape[0] > 0:
dir_rot = limit_period(bbox[..., 6] - dir_offset, 0, np.pi)
bbox[..., 6] = \
dir_rot + dir_offset + np.pi * dir_cls.to(bbox.dtype)
bbox[:, 6] = torch.atan2(centers2d[:, 0] - cam2img[0, 2],
cam2img[0, 0]) + bbox[:, 6]
return bbox
...@@ -9,6 +9,7 @@ from torch import nn as nn ...@@ -9,6 +9,7 @@ from torch import nn as nn
from mmdet3d.core import (box3d_multiclass_nms, limit_period, points_img2cam, from mmdet3d.core import (box3d_multiclass_nms, limit_period, points_img2cam,
xywhr2xyxyr) xywhr2xyxyr)
from mmdet.core import multi_apply from mmdet.core import multi_apply
from mmdet.core.bbox.builder import build_bbox_coder
from mmdet.models.builder import HEADS, build_loss from mmdet.models.builder import HEADS, build_loss
from .anchor_free_mono3d_head import AnchorFreeMono3DHead from .anchor_free_mono3d_head import AnchorFreeMono3DHead
...@@ -75,6 +76,7 @@ class FCOSMono3DHead(AnchorFreeMono3DHead): ...@@ -75,6 +76,7 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
type='CrossEntropyLoss', type='CrossEntropyLoss',
use_sigmoid=True, use_sigmoid=True,
loss_weight=1.0), loss_weight=1.0),
bbox_coder=dict(type='FCOS3DBBoxCoder', code_size=9),
norm_cfg=dict(type='GN', num_groups=32, requires_grad=True), norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),
centerness_branch=(64, ), centerness_branch=(64, ),
init_cfg=None, init_cfg=None,
...@@ -97,6 +99,8 @@ class FCOSMono3DHead(AnchorFreeMono3DHead): ...@@ -97,6 +99,8 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
init_cfg=init_cfg, init_cfg=init_cfg,
**kwargs) **kwargs)
self.loss_centerness = build_loss(loss_centerness) self.loss_centerness = build_loss(loss_centerness)
bbox_coder['code_size'] = self.bbox_code_size
self.bbox_coder = build_bbox_coder(bbox_coder)
if init_cfg is None: if init_cfg is None:
self.init_cfg = dict( self.init_cfg = dict(
type='Normal', type='Normal',
...@@ -112,9 +116,11 @@ class FCOSMono3DHead(AnchorFreeMono3DHead): ...@@ -112,9 +116,11 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
conv_channels=self.centerness_branch, conv_channels=self.centerness_branch,
conv_strides=(1, ) * len(self.centerness_branch)) conv_strides=(1, ) * len(self.centerness_branch))
self.conv_centerness = nn.Conv2d(self.centerness_branch[-1], 1, 1) self.conv_centerness = nn.Conv2d(self.centerness_branch[-1], 1, 1)
self.scale_dim = 3 # only for offset, depth and size regression
self.scales = nn.ModuleList([ self.scales = nn.ModuleList([
nn.ModuleList([Scale(1.0) for _ in range(3)]) for _ in self.strides nn.ModuleList([Scale(1.0) for _ in range(self.scale_dim)])
]) # only for offset, depth and size regression for _ in self.strides
])
def forward(self, feats): def forward(self, feats):
"""Forward features from the upstream network. """Forward features from the upstream network.
...@@ -140,8 +146,9 @@ class FCOSMono3DHead(AnchorFreeMono3DHead): ...@@ -140,8 +146,9 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
centernesses (list[Tensor]): Centerness for each scale level, centernesses (list[Tensor]): Centerness for each scale level,
each is a 4D-tensor, the channel number is num_points * 1. each is a 4D-tensor, the channel number is num_points * 1.
""" """
# Note: we use [:5] to filter feats and only return predictions
return multi_apply(self.forward_single, feats, self.scales, return multi_apply(self.forward_single, feats, self.scales,
self.strides) self.strides)[:5]
def forward_single(self, x, scale, stride): def forward_single(self, x, scale, stride):
"""Forward features of a single scale levle. """Forward features of a single scale levle.
...@@ -171,26 +178,12 @@ class FCOSMono3DHead(AnchorFreeMono3DHead): ...@@ -171,26 +178,12 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
for conv_centerness_prev_layer in self.conv_centerness_prev: for conv_centerness_prev_layer in self.conv_centerness_prev:
clone_cls_feat = conv_centerness_prev_layer(clone_cls_feat) clone_cls_feat = conv_centerness_prev_layer(clone_cls_feat)
centerness = self.conv_centerness(clone_cls_feat) centerness = self.conv_centerness(clone_cls_feat)
# scale the bbox_pred of different level
# only apply to offset, depth and size prediction
scale_offset, scale_depth, scale_size = scale[0:3]
clone_bbox_pred = bbox_pred.clone() bbox_pred = self.bbox_coder.decode(bbox_pred, scale, stride,
bbox_pred[:, :2] = scale_offset(clone_bbox_pred[:, :2]).float() self.training, cls_score)
bbox_pred[:, 2] = scale_depth(clone_bbox_pred[:, 2]).float()
bbox_pred[:, 3:6] = scale_size(clone_bbox_pred[:, 3:6]).float()
bbox_pred[:, 2] = bbox_pred[:, 2].exp() return cls_score, bbox_pred, dir_cls_pred, attr_pred, centerness, \
bbox_pred[:, 3:6] = bbox_pred[:, 3:6].exp() + 1e-6 # avoid size=0 cls_feat, reg_feat
assert self.norm_on_bbox is True, 'Setting norm_on_bbox to False '\
'has not been thoroughly tested for FCOS3D.'
if self.norm_on_bbox:
if not self.training:
# Note that this line is conducted only when testing
bbox_pred[:, :2] *= stride
return cls_score, bbox_pred, dir_cls_pred, attr_pred, centerness
@staticmethod @staticmethod
def add_sin_difference(boxes1, boxes2): def add_sin_difference(boxes1, boxes2):
...@@ -654,19 +647,13 @@ class FCOSMono3DHead(AnchorFreeMono3DHead): ...@@ -654,19 +647,13 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
mlvl_dir_scores = torch.cat(mlvl_dir_scores) mlvl_dir_scores = torch.cat(mlvl_dir_scores)
# change local yaw to global yaw for 3D nms # change local yaw to global yaw for 3D nms
if mlvl_bboxes.shape[0] > 0: cam2img = mlvl_centers2d.new_zeros((4, 4))
dir_rot = limit_period(mlvl_bboxes[..., 6] - self.dir_offset, 0, cam2img[:view.shape[0], :view.shape[1]] = \
np.pi)
mlvl_bboxes[..., 6] = (
dir_rot + self.dir_offset +
np.pi * mlvl_dir_scores.to(mlvl_bboxes.dtype))
cam_intrinsic = mlvl_centers2d.new_zeros((4, 4))
cam_intrinsic[:view.shape[0], :view.shape[1]] = \
mlvl_centers2d.new_tensor(view) mlvl_centers2d.new_tensor(view)
mlvl_bboxes[:, 6] = torch.atan2( mlvl_bboxes = self.bbox_coder.decode_yaw(mlvl_bboxes, mlvl_centers2d,
mlvl_centers2d[:, 0] - cam_intrinsic[0, 2], mlvl_dir_scores,
cam_intrinsic[0, 0]) + mlvl_bboxes[:, 6] self.dir_offset, cam2img)
mlvl_bboxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d']( mlvl_bboxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](
mlvl_bboxes, box_dim=self.bbox_code_size, mlvl_bboxes, box_dim=self.bbox_code_size,
origin=(0.5, 0.5, 0.5)).bev) origin=(0.5, 0.5, 0.5)).bev)
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import torch import torch
from mmcv.cnn import Scale
from torch import nn as nn
from mmdet3d.core.bbox import DepthInstance3DBoxes, LiDARInstance3DBoxes from mmdet3d.core.bbox import DepthInstance3DBoxes, LiDARInstance3DBoxes
from mmdet.core import build_bbox_coder from mmdet.core import build_bbox_coder
...@@ -382,3 +384,84 @@ def test_point_xyzwhlr_bbox_coder(): ...@@ -382,3 +384,84 @@ def test_point_xyzwhlr_bbox_coder():
# test decode # test decode
bbox3d_out = boxcoder.decode(bbox_target, points, gt_labels_3d) bbox3d_out = boxcoder.decode(bbox_target, points, gt_labels_3d)
assert torch.allclose(bbox3d_out, gt_bboxes_3d, atol=1e-4) assert torch.allclose(bbox3d_out, gt_bboxes_3d, atol=1e-4)
def test_fcos3d_bbox_coder():
# test a config without priors
bbox_coder_cfg = dict(
type='FCOS3DBBoxCoder',
base_depths=None,
base_dims=None,
code_size=7,
norm_on_bbox=True)
bbox_coder = build_bbox_coder(bbox_coder_cfg)
# test decode
# [2, 7, 1, 1]
batch_bbox_out = torch.tensor([[[[0.3130]], [[0.7094]], [[0.8743]],
[[0.0570]], [[0.5579]], [[0.1593]],
[[0.4553]]],
[[[0.7758]], [[0.2298]], [[0.3925]],
[[0.6307]], [[0.4377]], [[0.3339]],
[[0.1966]]]])
batch_scale = nn.ModuleList([Scale(1.0) for _ in range(3)])
stride = 2
training = False
cls_score = torch.randn([2, 2, 1, 1]).sigmoid()
decode_bbox_out = bbox_coder.decode(batch_bbox_out, batch_scale, stride,
training, cls_score)
expected_bbox_out = torch.tensor([[[[0.6261]], [[1.4188]], [[2.3971]],
[[1.0586]], [[1.7470]], [[1.1727]],
[[0.4553]]],
[[[1.5516]], [[0.4596]], [[1.4806]],
[[1.8790]], [[1.5492]], [[1.3965]],
[[0.1966]]]])
assert torch.allclose(decode_bbox_out, expected_bbox_out, atol=1e-3)
# test a config with priors
prior_bbox_coder_cfg = dict(
type='FCOS3DBBoxCoder',
base_depths=((28., 13.), (25., 12.)),
base_dims=((2., 3., 1.), (1., 2., 3.)),
code_size=7,
norm_on_bbox=True)
prior_bbox_coder = build_bbox_coder(prior_bbox_coder_cfg)
# test decode
batch_bbox_out = torch.tensor([[[[0.3130]], [[0.7094]], [[0.8743]],
[[0.0570]], [[0.5579]], [[0.1593]],
[[0.4553]]],
[[[0.7758]], [[0.2298]], [[0.3925]],
[[0.6307]], [[0.4377]], [[0.3339]],
[[0.1966]]]])
batch_scale = nn.ModuleList([Scale(1.0) for _ in range(3)])
stride = 2
training = False
cls_score = torch.tensor([[[[0.5811]], [[0.6198]]], [[[0.4889]],
[[0.8142]]]])
decode_bbox_out = prior_bbox_coder.decode(batch_bbox_out, batch_scale,
stride, training, cls_score)
expected_bbox_out = torch.tensor([[[[0.6260]], [[1.4188]], [[35.4916]],
[[1.0587]], [[3.4940]], [[3.5181]],
[[0.4553]]],
[[[1.5516]], [[0.4596]], [[29.7100]],
[[1.8789]], [[3.0983]], [[4.1892]],
[[0.1966]]]])
assert torch.allclose(decode_bbox_out, expected_bbox_out, atol=1e-3)
# test decode_yaw
decode_bbox_out = decode_bbox_out.permute(0, 2, 3, 1).view(-1, 7)
batch_centers2d = torch.tensor([[100., 150.], [200., 100.]])
batch_dir_cls = torch.tensor([0., 1.])
dir_offset = 0.7854
cam2img = torch.tensor([[700., 0., 450., 0.], [0., 700., 200., 0.],
[0., 0., 1., 0.], [0., 0., 0., 1.]])
decode_bbox_out = prior_bbox_coder.decode_yaw(decode_bbox_out,
batch_centers2d,
batch_dir_cls, dir_offset,
cam2img)
expected_bbox_out = torch.tensor(
[[0.6260, 1.4188, 35.4916, 1.0587, 3.4940, 3.5181, 3.1332],
[1.5516, 0.4596, 29.7100, 1.8789, 3.0983, 4.1892, 6.1368]])
assert torch.allclose(decode_bbox_out, expected_bbox_out, atol=1e-3)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment