Commit cc5e8579 authored by Xi Liu's avatar Xi Liu Committed by Tai-Wang
Browse files

[Feature] PointXYZWHLRBBoxCoder (#856)

* support PointBasedBoxCoder

* fix unittest bug

* support unittest in gpu

* support unittest in gpu

* modified docstring

* add args

* add args
parent 53435c62
......@@ -5,8 +5,10 @@ from .centerpoint_bbox_coders import CenterPointBBoxCoder
from .delta_xyzwhlr_bbox_coder import DeltaXYZWLHRBBoxCoder
from .groupfree3d_bbox_coder import GroupFree3DBBoxCoder
from .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder
from .point_xyzwhlr_bbox_coder import PointXYZWHLRBBoxCoder
__all__ = [
'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'PartialBinBasedBBoxCoder',
'CenterPointBBoxCoder', 'AnchorFreeBBoxCoder', 'GroupFree3DBBoxCoder'
'CenterPointBBoxCoder', 'AnchorFreeBBoxCoder', 'GroupFree3DBBoxCoder',
'PointXYZWHLRBBoxCoder'
]
import numpy as np
import torch
from mmdet.core.bbox import BaseBBoxCoder
from mmdet.core.bbox.builder import BBOX_CODERS
@BBOX_CODERS.register_module()
class PointXYZWHLRBBoxCoder(BaseBBoxCoder):
"""Point based bbox coder for 3D boxes.
Args:
code_size (int): The dimension of boxes to be encoded.
use_mean_size (bool, optional): Whether using anchors based on class.
Defaults to True.
mean_size (list[list[float]], optional): Mean size of bboxes in
each class. Defaults to None.
"""
def __init__(self, code_size=7, use_mean_size=True, mean_size=None):
super(PointXYZWHLRBBoxCoder, self).__init__()
self.code_size = code_size
self.use_mean_size = use_mean_size
if self.use_mean_size:
self.mean_size = torch.from_numpy(np.array(mean_size)).float()
assert self.mean_size.min() > 0, \
f'The min of mean_size should > 0, however currently it is '\
f'{self.mean_size.min()}, please check it in your config.'
def encode(self, gt_bboxes_3d, points, gt_labels_3d=None):
"""Encode ground truth to prediction targets.
Args:
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth bboxes
with shape (N, 7 + C).
points (torch.Tensor): Point cloud with shape (N, 3).
gt_labels_3d (torch.Tensor, optional): Ground truth classes.
Defaults to None.
Returns:
torch.Tensor: Encoded boxes with shape (N, 8 + C).
"""
gt_bboxes_3d[:, 3:6] = torch.clamp_min(gt_bboxes_3d[:, 3:6], min=1e-5)
xg, yg, zg, dxg, dyg, dzg, rg, *cgs = torch.split(
gt_bboxes_3d, 1, dim=-1)
xa, ya, za = torch.split(points, 1, dim=-1)
if self.use_mean_size:
assert gt_labels_3d.max() <= self.mean_size.shape[0] - 1, \
f'the max gt label {gt_labels_3d.max()} is bigger than' \
f'anchor types {self.mean_size.shape[0] - 1}.'
self.mean_size = self.mean_size.to(gt_labels_3d.device)
point_anchor_size = self.mean_size[gt_labels_3d]
dxa, dya, dza = torch.split(point_anchor_size, 1, dim=-1)
diagonal = torch.sqrt(dxa**2 + dya**2)
xt = (xg - xa) / diagonal
yt = (yg - ya) / diagonal
zt = (zg - za) / dza
dxt = torch.log(dxg / dxa)
dyt = torch.log(dyg / dya)
dzt = torch.log(dzg / dza)
else:
xt = (xg - xa)
yt = (yg - ya)
zt = (zg - za)
dxt = torch.log(dxg)
dyt = torch.log(dyg)
dzt = torch.log(dzg)
return torch.cat(
[xt, yt, zt, dxt, dyt, dzt,
torch.cos(rg),
torch.sin(rg), *cgs],
dim=-1)
def decode(self, box_encodings, points, pred_labels_3d=None):
"""Decode predicted parts and points to bbox3d.
Args:
box_encodings (torch.Tensor): Encoded boxes with shape (N, 8 + C).
points (torch.Tensor): Point cloud with shape (N, 3).
pred_labels_3d (torch.Tensor): Bbox predicted labels (N, M).
Returns:
torch.Tensor: Decoded boxes with shape (N, 7 + C)
"""
xt, yt, zt, dxt, dyt, dzt, cost, sint, *cts = torch.split(
box_encodings, 1, dim=-1)
xa, ya, za = torch.split(points, 1, dim=-1)
if self.use_mean_size:
assert pred_labels_3d.max() <= self.mean_size.shape[0] - 1, \
f'The max pred label {pred_labels_3d.max()} is bigger than' \
f'anchor types {self.mean_size.shape[0] - 1}.'
self.mean_size = self.mean_size.to(pred_labels_3d.device)
point_anchor_size = self.mean_size[pred_labels_3d]
dxa, dya, dza = torch.split(point_anchor_size, 1, dim=-1)
diagonal = torch.sqrt(dxa**2 + dya**2)
xg = xt * diagonal + xa
yg = yt * diagonal + ya
zg = zt * dza + za
dxg = torch.exp(dxt) * dxa
dyg = torch.exp(dyt) * dya
dzg = torch.exp(dzt) * dza
else:
xg = xt + xa
yg = yt + ya
zg = zt + za
dxg, dyg, dzg = torch.split(
torch.exp(box_encodings[..., 3:6]), 1, dim=-1)
rg = torch.atan2(sint, cost)
return torch.cat([xg, yg, zg, dxg, dyg, dzg, rg, *cts], dim=-1)
......@@ -352,3 +352,33 @@ def test_centerpoint_bbox_coder():
assert temp[i]['bboxes'].shape == torch.Size([500, 9])
assert temp[i]['scores'].shape == torch.Size([500])
assert temp[i]['labels'].shape == torch.Size([500])
def test_point_xyzwhlr_bbox_coder():
bbox_coder_cfg = dict(
type='PointXYZWHLRBBoxCoder',
use_mean_size=True,
mean_size=[[3.9, 1.6, 1.56], [0.8, 0.6, 1.73], [1.76, 0.6, 1.73]])
boxcoder = build_bbox_coder(bbox_coder_cfg)
# test encode
gt_bboxes_3d = torch.tensor(
[[13.3329, 2.3514, -0.7004, 1.7508, 0.4702, 1.7909, -3.0522],
[2.2068, -2.6994, -0.3277, 3.8703, 1.6602, 1.6913, -1.9057],
[5.5269, 2.5085, -1.0129, 1.1496, 0.8006, 1.8887, 2.1756]])
points = torch.tensor([[13.70, 2.40, 0.12], [3.20, -3.00, 0.2],
[5.70, 2.20, -0.4]])
gt_labels_3d = torch.tensor([2, 0, 1])
bbox_target = boxcoder.encode(gt_bboxes_3d, points, gt_labels_3d)
expected_bbox_target = torch.tensor([[
-0.1974, -0.0261, -0.4742, -0.0052, -0.2438, 0.0346, -0.9960, -0.0893
], [-0.2356, 0.0713, -0.3383, -0.0076, 0.0369, 0.0808, -0.3287, -0.9444
], [-0.1731, 0.3085, -0.3543, 0.3626, 0.2884, 0.0878, -0.5686,
0.8226]])
assert torch.allclose(expected_bbox_target, bbox_target, atol=1e-4)
# test decode
bbox3d_out = boxcoder.decode(bbox_target, points, gt_labels_3d)
assert torch.allclose(bbox3d_out, gt_bboxes_3d, atol=1e-4)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment