Unverified Commit d7067e44 authored by Wenwei Zhang's avatar Wenwei Zhang Committed by GitHub
Browse files

Bump version to v1.1.0rc2

Bump to v1.1.0rc2
parents 28fe73d2 fb0e57e5
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from mmdet.models.losses import FocalLoss, SmoothL1Loss, binary_cross_entropy from mmdet.models.losses import FocalLoss, SmoothL1Loss, binary_cross_entropy
from .axis_aligned_iou_loss import AxisAlignedIoULoss, axis_aligned_iou_loss from .axis_aligned_iou_loss import AxisAlignedIoULoss, axis_aligned_iou_loss
from .chamfer_distance import ChamferDistance, chamfer_distance from .chamfer_distance import ChamferDistance, chamfer_distance
from .multibin_loss import MultiBinLoss from .multibin_loss import MultiBinLoss
from .paconv_regularization_loss import PAConvRegularizationLoss from .paconv_regularization_loss import PAConvRegularizationLoss
from .rotated_iou_loss import RotatedIoU3DLoss, rotated_iou_3d_loss
from .uncertain_smooth_l1_loss import UncertainL1Loss, UncertainSmoothL1Loss from .uncertain_smooth_l1_loss import UncertainL1Loss, UncertainSmoothL1Loss
__all__ = [ __all__ = [
'FocalLoss', 'SmoothL1Loss', 'binary_cross_entropy', 'ChamferDistance', 'FocalLoss', 'SmoothL1Loss', 'binary_cross_entropy', 'ChamferDistance',
'chamfer_distance', 'axis_aligned_iou_loss', 'AxisAlignedIoULoss', 'chamfer_distance', 'axis_aligned_iou_loss', 'AxisAlignedIoULoss',
'PAConvRegularizationLoss', 'UncertainL1Loss', 'UncertainSmoothL1Loss', 'PAConvRegularizationLoss', 'UncertainL1Loss', 'UncertainSmoothL1Loss',
'MultiBinLoss' 'MultiBinLoss', 'RotatedIoU3DLoss', 'rotated_iou_3d_loss'
] ]
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import torch import torch
from mmdet.models.losses.utils import weighted_loss
from torch import nn as nn from torch import nn as nn
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from mmdet3d.structures import AxisAlignedBboxOverlaps3D from mmdet3d.structures import AxisAlignedBboxOverlaps3D
from mmdet.models.losses.utils import weighted_loss
@weighted_loss @weighted_loss
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import torch import torch
from mmdet.models.losses.utils import weighted_loss
from torch import nn as nn from torch import nn as nn
from torch.nn import functional as F from torch.nn import functional as F
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from mmdet.models.losses.utils import weighted_loss
@weighted_loss @weighted_loss
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import torch import torch
from mmdet.models.losses.utils import weight_reduce_loss
from torch import nn as nn from torch import nn as nn
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from mmdet.models.losses.utils import weight_reduce_loss
from ..layers import PAConv, PAConvCUDA from ..layers import PAConv, PAConvCUDA
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional
import torch
from mmcv.ops import diff_iou_rotated_3d
from mmdet.models.losses.utils import weighted_loss
from torch import Tensor
from torch import nn as nn
from mmdet3d.registry import MODELS
@weighted_loss
def rotated_iou_3d_loss(pred, target: Tensor) -> Tensor:
"""Calculate the IoU loss (1-IoU) of two sets of rotated bounding boxes.
Note that predictions and targets are one-to-one corresponded.
Args:
pred (torch.Tensor): Bbox predictions with shape [N, 7]
(x, y, z, w, l, h, alpha).
target (torch.Tensor): Bbox targets (gt) with shape [N, 7]
(x, y, z, w, l, h, alpha).
Returns:
torch.Tensor: IoU loss between predictions and targets.
"""
iou_loss = 1 - diff_iou_rotated_3d(pred.unsqueeze(0),
target.unsqueeze(0))[0]
return iou_loss
@MODELS.register_module()
class RotatedIoU3DLoss(nn.Module):
"""Calculate the IoU loss (1-IoU) of rotated bounding boxes.
Args:
reduction (str): Method to reduce losses.
The valid reduction method are none, sum or mean.
loss_weight (float, optional): Weight of loss. Defaults to 1.0.
"""
def __init__(self,
reduction: str = 'mean',
loss_weight: Optional[float] = 1.0):
super().__init__()
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred: Tensor,
target: Tensor,
weight: Optional[Tensor] = None,
avg_factor: Optional[int] = None,
reduction_override: Optional[str] = None,
**kwargs) -> Tensor:
"""Forward function of loss calculation.
Args:
pred (torch.Tensor): Bbox predictions with shape [..., 7]
(x, y, z, w, l, h, alpha).
target (torch.Tensor): Bbox targets (gt) with shape [..., 7]
(x, y, z, w, l, h, alpha).
weight (torch.Tensor | float, optional): Weight of loss.
Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction_override (str, optional): Method to reduce losses.
The valid reduction method are 'none', 'sum' or 'mean'.
Defaults to None.
Returns:
torch.Tensor: IoU loss between predictions and targets.
"""
if weight is not None and not torch.any(weight > 0):
return pred.sum() * weight.sum() # 0
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
if weight is not None and weight.dim() > 1:
weight = weight.mean(-1)
loss = self.loss_weight * rotated_iou_3d_loss(
pred,
target,
weight,
reduction=reduction,
avg_factor=avg_factor,
**kwargs)
return loss
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import torch import torch
from mmdet.models.losses.utils import weighted_loss
from torch import nn as nn from torch import nn as nn
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from mmdet.models.losses.utils import weighted_loss
@weighted_loss @weighted_loss
......
...@@ -2,7 +2,9 @@ ...@@ -2,7 +2,9 @@
from .pillar_scatter import PointPillarsScatter from .pillar_scatter import PointPillarsScatter
from .sparse_encoder import SparseEncoder, SparseEncoderSASSD from .sparse_encoder import SparseEncoder, SparseEncoderSASSD
from .sparse_unet import SparseUNet from .sparse_unet import SparseUNet
from .voxel_set_abstraction import VoxelSetAbstraction
__all__ = [ __all__ = [
'PointPillarsScatter', 'SparseEncoder', 'SparseEncoderSASSD', 'SparseUNet' 'PointPillarsScatter', 'SparseEncoder', 'SparseEncoderSASSD', 'SparseUNet',
'VoxelSetAbstraction'
] ]
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Tuple
import torch import torch
from mmcv.ops import points_in_boxes_all, three_interpolate, three_nn from mmcv.ops import points_in_boxes_all, three_interpolate, three_nn
from mmdet.models.losses import sigmoid_focal_loss, smooth_l1_loss
from torch import Tensor
from torch import nn as nn from torch import nn as nn
from mmdet3d.models.layers import SparseBasicBlock, make_sparse_convmodule from mmdet3d.models.layers import SparseBasicBlock, make_sparse_convmodule
from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from mmdet.models.losses import sigmoid_focal_loss, smooth_l1_loss from mmdet3d.structures import BaseInstance3DBoxes
if IS_SPCONV2_AVAILABLE: if IS_SPCONV2_AVAILABLE:
from spconv.pytorch import SparseConvTensor, SparseSequential from spconv.pytorch import SparseConvTensor, SparseSequential
...@@ -37,6 +41,8 @@ class SparseEncoder(nn.Module): ...@@ -37,6 +41,8 @@ class SparseEncoder(nn.Module):
Defaults to ((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, 1)). Defaults to ((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, 1)).
block_type (str, optional): Type of the block to use. block_type (str, optional): Type of the block to use.
Defaults to 'conv_module'. Defaults to 'conv_module'.
return_middle_feats (bool): Whether output middle features.
Default to False.
""" """
def __init__(self, def __init__(self,
...@@ -50,7 +56,8 @@ class SparseEncoder(nn.Module): ...@@ -50,7 +56,8 @@ class SparseEncoder(nn.Module):
64)), 64)),
encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,
1)), 1)),
block_type='conv_module'): block_type='conv_module',
return_middle_feats=False):
super().__init__() super().__init__()
assert block_type in ['conv_module', 'basicblock'] assert block_type in ['conv_module', 'basicblock']
self.sparse_shape = sparse_shape self.sparse_shape = sparse_shape
...@@ -62,6 +69,7 @@ class SparseEncoder(nn.Module): ...@@ -62,6 +69,7 @@ class SparseEncoder(nn.Module):
self.encoder_paddings = encoder_paddings self.encoder_paddings = encoder_paddings
self.stage_num = len(self.encoder_channels) self.stage_num = len(self.encoder_channels)
self.fp16_enabled = False self.fp16_enabled = False
self.return_middle_feats = return_middle_feats
# Spconv init all weight on its own # Spconv init all weight on its own
assert isinstance(order, tuple) and len(order) == 3 assert isinstance(order, tuple) and len(order) == 3
...@@ -113,7 +121,14 @@ class SparseEncoder(nn.Module): ...@@ -113,7 +121,14 @@ class SparseEncoder(nn.Module):
batch_size (int): Batch size. batch_size (int): Batch size.
Returns: Returns:
dict: Backbone features. torch.Tensor | tuple[torch.Tensor, list]: Return spatial features
include:
- spatial_features (torch.Tensor): Spatial features are out from
the last layer.
- encode_features (List[SparseConvTensor], optional): Middle layer
output features. When self.return_middle_feats is True, the
module returns middle features.
""" """
coors = coors.int() coors = coors.int()
input_sp_tensor = SparseConvTensor(voxel_features, coors, input_sp_tensor = SparseConvTensor(voxel_features, coors,
...@@ -133,7 +148,10 @@ class SparseEncoder(nn.Module): ...@@ -133,7 +148,10 @@ class SparseEncoder(nn.Module):
N, C, D, H, W = spatial_features.shape N, C, D, H, W = spatial_features.shape
spatial_features = spatial_features.view(N, C * D, H, W) spatial_features = spatial_features.view(N, C * D, H, W)
return spatial_features if self.return_middle_feats:
return spatial_features, encode_features
else:
return spatial_features
def make_encoder_layers(self, def make_encoder_layers(self,
make_block, make_block,
...@@ -238,17 +256,17 @@ class SparseEncoderSASSD(SparseEncoder): ...@@ -238,17 +256,17 @@ class SparseEncoderSASSD(SparseEncoder):
""" """
def __init__(self, def __init__(self,
in_channels, in_channels: int,
sparse_shape, sparse_shape: List[int],
order=('conv', 'norm', 'act'), order: Tuple[str] = ('conv', 'norm', 'act'),
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), norm_cfg: dict = dict(type='BN1d', eps=1e-3, momentum=0.01),
base_channels=16, base_channels: int = 16,
output_channels=128, output_channels: int = 128,
encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64, encoder_channels: Tuple[tuple] = ((16, ), (32, 32, 32),
64)), (64, 64, 64), (64, 64, 64)),
encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, encoder_paddings: Tuple[tuple] = ((1, ), (1, 1, 1), (1, 1, 1),
1)), ((0, 1, 1), 1, 1)),
block_type='conv_module'): block_type: str = 'conv_module'):
super(SparseEncoderSASSD, self).__init__( super(SparseEncoderSASSD, self).__init__(
in_channels=in_channels, in_channels=in_channels,
sparse_shape=sparse_shape, sparse_shape=sparse_shape,
...@@ -264,7 +282,11 @@ class SparseEncoderSASSD(SparseEncoder): ...@@ -264,7 +282,11 @@ class SparseEncoderSASSD(SparseEncoder):
self.point_cls = nn.Linear(64, 1, bias=False) self.point_cls = nn.Linear(64, 1, bias=False)
self.point_reg = nn.Linear(64, 3, bias=False) self.point_reg = nn.Linear(64, 3, bias=False)
def forward(self, voxel_features, coors, batch_size, test_mode=False): def forward(self,
voxel_features: Tensor,
coors: Tensor,
batch_size: Tensor,
test_mode: bool = False) -> Tuple[Tensor, tuple]:
"""Forward of SparseEncoder. """Forward of SparseEncoder.
Args: Args:
...@@ -276,7 +298,7 @@ class SparseEncoderSASSD(SparseEncoder): ...@@ -276,7 +298,7 @@ class SparseEncoderSASSD(SparseEncoder):
Defaults to False. Defaults to False.
Returns: Returns:
dict: Backbone features. Tensor: Backbone features.
tuple[torch.Tensor]: Mean feature value of the points, tuple[torch.Tensor]: Mean feature value of the points,
Classification result of the points, Classification result of the points,
Regression offsets of the points. Regression offsets of the points.
...@@ -333,14 +355,17 @@ class SparseEncoderSASSD(SparseEncoder): ...@@ -333,14 +355,17 @@ class SparseEncoderSASSD(SparseEncoder):
return spatial_features, point_misc return spatial_features, point_misc
def get_auxiliary_targets(self, nxyz, gt_boxes3d, enlarge=1.0): def get_auxiliary_targets(self,
points_feats: Tensor,
gt_bboxes_3d: List[BaseInstance3DBoxes],
enlarge: float = 1.0) -> Tuple[Tensor, Tensor]:
"""Get auxiliary target. """Get auxiliary target.
Args: Args:
nxyz (torch.Tensor): Mean features of the points. points_feats (torch.Tensor): Mean features of the points.
gt_boxes3d (torch.Tensor): Coordinates in shape (N, 4), gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
the columns in the order of (batch_idx, z_idx, y_idx, x_idx). boxes for each sample.
enlarge (int, optional): Enlaged scale. Defaults to 1.0. enlarge (float, optional): Enlaged scale. Defaults to 1.0.
Returns: Returns:
tuple[torch.Tensor]: Label of the points and tuple[torch.Tensor]: Label of the points and
...@@ -348,31 +373,32 @@ class SparseEncoderSASSD(SparseEncoder): ...@@ -348,31 +373,32 @@ class SparseEncoderSASSD(SparseEncoder):
""" """
center_offsets = list() center_offsets = list()
pts_labels = list() pts_labels = list()
for i in range(len(gt_boxes3d)): for i in range(len(gt_bboxes_3d)):
boxes3d = gt_boxes3d[i].tensor.cpu() boxes3d = gt_bboxes_3d[i].tensor.detach().clone()
idx = torch.nonzero(nxyz[:, 0] == i).view(-1) idx = torch.nonzero(points_feats[:, 0] == i).view(-1)
new_xyz = nxyz[idx, 1:].cpu() point_xyz = points_feats[idx, 1:].detach().clone()
boxes3d[:, 3:6] *= enlarge boxes3d[:, 3:6] *= enlarge
pts_in_flag, center_offset = self.calculate_pts_offsets( pts_in_flag, center_offset = self.calculate_pts_offsets(
new_xyz, boxes3d) point_xyz, boxes3d)
pts_label = pts_in_flag.max(0)[0].byte() pts_label = pts_in_flag.max(0)[0].byte()
pts_labels.append(pts_label) pts_labels.append(pts_label)
center_offsets.append(center_offset) center_offsets.append(center_offset)
center_offsets = torch.cat(center_offsets).cuda() center_offsets = torch.cat(center_offsets)
pts_labels = torch.cat(pts_labels).to(center_offsets.device) pts_labels = torch.cat(pts_labels).to(center_offsets.device)
return pts_labels, center_offsets return pts_labels, center_offsets
def calculate_pts_offsets(self, points, boxes): def calculate_pts_offsets(self, points: Tensor,
bboxes_3d: Tensor) -> Tuple[Tensor, Tensor]:
"""Find all boxes in which each point is, as well as the offsets from """Find all boxes in which each point is, as well as the offsets from
the box centers. the box centers.
Args: Args:
points (torch.Tensor): [M, 3], [x, y, z] in LiDAR/DEPTH coordinate points (torch.Tensor): [M, 3], [x, y, z] in LiDAR coordinate
boxes (torch.Tensor): [T, 7], bboxes_3d (torch.Tensor): [T, 7],
num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz], num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz],
(x, y, z) is the bottom center. (x, y, z) is the bottom center.
...@@ -383,44 +409,41 @@ class SparseEncoderSASSD(SparseEncoder): ...@@ -383,44 +409,41 @@ class SparseEncoderSASSD(SparseEncoder):
if it belows to the box, with the shape of (M, 3). if it belows to the box, with the shape of (M, 3).
Default background = 0. Default background = 0.
""" """
boxes_num = len(boxes) boxes_num = len(bboxes_3d)
pts_num = len(points) pts_num = len(points)
points = points.cuda()
boxes = boxes.to(points.device)
box_idxs_of_pts = points_in_boxes_all(points[None, ...], boxes[None, box_indices = points_in_boxes_all(points[None, ...], bboxes_3d[None,
...]) ...])
pts_indices = box_indices.squeeze(0).transpose(0, 1)
pts_indices = box_idxs_of_pts.squeeze(0).transpose(0, 1)
center_offsets = torch.zeros_like(points).to(points.device) center_offsets = torch.zeros_like(points).to(points.device)
for i in range(boxes_num): for i in range(boxes_num):
for j in range(pts_num): for j in range(pts_num):
if pts_indices[i][j] == 1: if pts_indices[i][j] == 1:
center_offsets[j][0] = points[j][0] - boxes[i][0] center_offsets[j][0] = points[j][0] - bboxes_3d[i][0]
center_offsets[j][1] = points[j][1] - boxes[i][1] center_offsets[j][1] = points[j][1] - bboxes_3d[i][1]
center_offsets[j][2] = ( center_offsets[j][2] = (
points[j][2] - (boxes[i][2] + boxes[i][2] / 2.0)) points[j][2] -
return pts_indices.cpu(), center_offsets.cpu() (bboxes_3d[i][2] + bboxes_3d[i][2] / 2.0))
return pts_indices, center_offsets
def aux_loss(self, points, point_cls, point_reg, gt_bboxes): def aux_loss(self, points: Tensor, point_cls: Tensor, point_reg: Tensor,
gt_bboxes_3d: Tensor) -> dict:
"""Calculate auxiliary loss. """Calculate auxiliary loss.
Args: Args:
points (torch.Tensor): Mean feature value of the points. points (torch.Tensor): Mean feature value of the points.
point_cls (torch.Tensor): Classification result of the points. point_cls (torch.Tensor): Classification result of the points.
point_reg (torch.Tensor): Regression offsets of the points. point_reg (torch.Tensor): Regression offsets of the points.
gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Ground truth gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
boxes for each sample. boxes for each sample.
Returns: Returns:
dict: Backbone features. dict: Auxiliary loss.
""" """
num_boxes = len(gt_bboxes) num_boxes = len(gt_bboxes_3d)
pts_labels, center_targets = self.get_auxiliary_targets( pts_labels, center_targets = self.get_auxiliary_targets(
points, gt_bboxes) points, gt_bboxes_3d)
rpn_cls_target = pts_labels.long() rpn_cls_target = pts_labels.long()
pos = (pts_labels > 0).float() pos = (pts_labels > 0).float()
...@@ -449,11 +472,13 @@ class SparseEncoderSASSD(SparseEncoder): ...@@ -449,11 +472,13 @@ class SparseEncoderSASSD(SparseEncoder):
return dict(aux_loss_cls=aux_loss_cls, aux_loss_reg=aux_loss_reg) return dict(aux_loss_cls=aux_loss_cls, aux_loss_reg=aux_loss_reg)
def make_auxiliary_points(self, def make_auxiliary_points(
source_tensor, self,
target, source_tensor: Tensor,
offset=(0., -40., -3.), target: Tensor,
voxel_size=(.05, .05, .1)): offset: Tuple = (0., -40., -3.),
voxel_size: Tuple = (.05, .05, .1)
) -> Tensor:
"""Make auxiliary points for loss computation. """Make auxiliary points for loss computation.
Args: Args:
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Optional
import mmengine
import torch
import torch.nn as nn
from mmcv.cnn import ConvModule
from mmcv.ops.furthest_point_sample import furthest_point_sample
from mmengine.model import BaseModule
from mmdet3d.registry import MODELS
from mmdet3d.utils import InstanceList
def bilinear_interpolate_torch(inputs, x, y):
"""Bilinear interpolate for inputs."""
x0 = torch.floor(x).long()
x1 = x0 + 1
y0 = torch.floor(y).long()
y1 = y0 + 1
x0 = torch.clamp(x0, 0, inputs.shape[1] - 1)
x1 = torch.clamp(x1, 0, inputs.shape[1] - 1)
y0 = torch.clamp(y0, 0, inputs.shape[0] - 1)
y1 = torch.clamp(y1, 0, inputs.shape[0] - 1)
Ia = inputs[y0, x0]
Ib = inputs[y1, x0]
Ic = inputs[y0, x1]
Id = inputs[y1, x1]
wa = (x1.type_as(x) - x) * (y1.type_as(y) - y)
wb = (x1.type_as(x) - x) * (y - y0.type_as(y))
wc = (x - x0.type_as(x)) * (y1.type_as(y) - y)
wd = (x - x0.type_as(x)) * (y - y0.type_as(y))
ans = torch.t((torch.t(Ia) * wa)) + torch.t(torch.t(Ib) * wb) + torch.t(
torch.t(Ic) * wc) + torch.t(torch.t(Id) * wd)
return ans
@MODELS.register_module()
class VoxelSetAbstraction(BaseModule):
"""Voxel set abstraction module for PVRCNN and PVRCNN++.
Args:
num_keypoints (int): The number of key points sampled from
raw points cloud.
fused_out_channel (int): Key points feature output channels
num after fused. Default to 128.
voxel_size (list[float]): Size of voxels. Defaults to
[0.05, 0.05, 0.1].
point_cloud_range (list[float]): Point cloud range. Defaults to
[0, -40, -3, 70.4, 40, 1].
voxel_sa_cfgs_list (List[dict or ConfigDict], optional): List of SA
module cfg. Used to gather key points features from multi-wise
voxel features. Default to None.
rawpoints_sa_cfgs (dict or ConfigDict, optional): SA module cfg.
Used to gather key points features from raw points. Default to
None.
bev_feat_channel (int): Bev features channels num.
Default to 256.
bev_scale_factor (int): Bev features scale factor. Default to 8.
voxel_center_as_source (bool): Whether used voxel centers as points
cloud key points. Defaults to False.
norm_cfg (dict[str]): Config of normalization layer. Default
used dict(type='BN1d', eps=1e-5, momentum=0.1).
bias (bool | str, optional): If specified as `auto`, it will be
decided by `norm_cfg`. `bias` will be set as True if
`norm_cfg` is None, otherwise False. Default: 'auto'.
"""
def __init__(self,
num_keypoints: int,
fused_out_channel: int = 128,
voxel_size: list = [0.05, 0.05, 0.1],
point_cloud_range: list = [0, -40, -3, 70.4, 40, 1],
voxel_sa_cfgs_list: Optional[list] = None,
rawpoints_sa_cfgs: Optional[dict] = None,
bev_feat_channel: int = 256,
bev_scale_factor: int = 8,
voxel_center_as_source: bool = False,
norm_cfg: dict = dict(type='BN2d', eps=1e-5, momentum=0.1),
bias: str = 'auto') -> None:
super().__init__()
self.num_keypoints = num_keypoints
self.fused_out_channel = fused_out_channel
self.voxel_size = voxel_size
self.point_cloud_range = point_cloud_range
self.voxel_center_as_source = voxel_center_as_source
gathered_channel = 0
if rawpoints_sa_cfgs is not None:
self.rawpoints_sa_layer = MODELS.build(rawpoints_sa_cfgs)
gathered_channel += sum(
[x[-1] for x in rawpoints_sa_cfgs.mlp_channels])
else:
self.rawpoints_sa_layer = None
if voxel_sa_cfgs_list is not None:
self.voxel_sa_configs_list = voxel_sa_cfgs_list
self.voxel_sa_layers = nn.ModuleList()
for voxel_sa_config in voxel_sa_cfgs_list:
cur_layer = MODELS.build(voxel_sa_config)
self.voxel_sa_layers.append(cur_layer)
gathered_channel += sum(
[x[-1] for x in voxel_sa_config.mlp_channels])
else:
self.voxel_sa_layers = None
if bev_feat_channel is not None and bev_scale_factor is not None:
self.bev_cfg = mmengine.Config(
dict(
bev_feat_channels=bev_feat_channel,
bev_scale_factor=bev_scale_factor))
gathered_channel += bev_feat_channel
else:
self.bev_cfg = None
self.point_feature_fusion_layer = nn.Sequential(
ConvModule(
gathered_channel,
fused_out_channel,
kernel_size=(1, 1),
stride=(1, 1),
conv_cfg=dict(type='Conv2d'),
norm_cfg=norm_cfg,
bias=bias))
def interpolate_from_bev_features(self, keypoints: torch.Tensor,
bev_features: torch.Tensor,
batch_size: int,
bev_scale_factor: int) -> torch.Tensor:
"""Gather key points features from bev feature map by interpolate.
Args:
keypoints (torch.Tensor): Sampled key points with shape
(N1 + N2 + ..., NDim).
bev_features (torch.Tensor): Bev feature map from the first
stage with shape (B, C, H, W).
batch_size (int): Input batch size.
bev_scale_factor (int): Bev feature map scale factor.
Returns:
torch.Tensor: Key points features gather from bev feature
map with shape (N1 + N2 + ..., C)
"""
x_idxs = (keypoints[..., 0] -
self.point_cloud_range[0]) / self.voxel_size[0]
y_idxs = (keypoints[..., 1] -
self.point_cloud_range[1]) / self.voxel_size[1]
x_idxs = x_idxs / bev_scale_factor
y_idxs = y_idxs / bev_scale_factor
point_bev_features_list = []
for k in range(batch_size):
cur_x_idxs = x_idxs[k, ...]
cur_y_idxs = y_idxs[k, ...]
cur_bev_features = bev_features[k].permute(1, 2, 0) # (H, W, C)
point_bev_features = bilinear_interpolate_torch(
cur_bev_features, cur_x_idxs, cur_y_idxs)
point_bev_features_list.append(point_bev_features)
point_bev_features = torch.cat(
point_bev_features_list, dim=0) # (N1 + N2 + ..., C)
return point_bev_features.view(batch_size, keypoints.shape[1], -1)
def get_voxel_centers(self, coors: torch.Tensor,
scale_factor: float) -> torch.Tensor:
"""Get voxel centers coordinate.
Args:
coors (torch.Tensor): Coordinates of voxels shape is Nx(1+NDim),
where 1 represents the batch index.
scale_factor (float): Scale factor.
Returns:
torch.Tensor: Voxel centers coordinate with shape (N, 3).
"""
assert coors.shape[1] == 4
voxel_centers = coors[:, [3, 2, 1]].float() # (xyz)
voxel_size = torch.tensor(
self.voxel_size,
device=voxel_centers.device).float() * scale_factor
pc_range = torch.tensor(
self.point_cloud_range[0:3], device=voxel_centers.device).float()
voxel_centers = (voxel_centers + 0.5) * voxel_size + pc_range
return voxel_centers
def sample_key_points(self, points: List[torch.Tensor],
coors: torch.Tensor) -> torch.Tensor:
"""Sample key points from raw points cloud.
Args:
points (List[torch.Tensor]): Point cloud of each sample.
coors (torch.Tensor): Coordinates of voxels shape is Nx(1+NDim),
where 1 represents the batch index.
Returns:
torch.Tensor: (B, M, 3) Key points of each sample.
M is num_keypoints.
"""
assert points is not None or coors is not None
if self.voxel_center_as_source:
_src_points = self.get_voxel_centers(coors=coors, scale_factor=1)
batch_size = coors[-1, 0].item() + 1
src_points = [
_src_points[coors[:, 0] == b] for b in range(batch_size)
]
else:
src_points = [p[..., :3] for p in points]
keypoints_list = []
for points_to_sample in src_points:
num_points = points_to_sample.shape[0]
cur_pt_idxs = furthest_point_sample(
points_to_sample.unsqueeze(dim=0).contiguous(),
self.num_keypoints).long()[0]
if num_points < self.num_keypoints:
times = int(self.num_keypoints / num_points) + 1
non_empty = cur_pt_idxs[:num_points]
cur_pt_idxs = non_empty.repeat(times)[:self.num_keypoints]
keypoints = points_to_sample[cur_pt_idxs]
keypoints_list.append(keypoints)
keypoints = torch.stack(keypoints_list, dim=0) # (B, M, 3)
return keypoints
def forward(self, batch_inputs_dict: dict, feats_dict: dict,
rpn_results_list: InstanceList) -> dict:
"""Extract point-wise features from multi-input.
Args:
batch_inputs_dict (dict): The model input dict which include
'points', 'voxels' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- voxels (dict[torch.Tensor]): Voxels of the batch sample.
feats_dict (dict): Contains features from the first
stage.
rpn_results_list (List[:obj:`InstanceData`]): Detection results
of rpn head.
Returns:
dict: Contain Point-wise features, include:
- keypoints (torch.Tensor): Sampled key points.
- keypoint_features (torch.Tensor): Gathered key points
features from multi input.
- fusion_keypoint_features (torch.Tensor): Fusion
keypoint_features by point_feature_fusion_layer.
"""
points = batch_inputs_dict['points']
voxel_encode_features = feats_dict['multi_scale_3d_feats']
bev_encode_features = feats_dict['spatial_feats']
if self.voxel_center_as_source:
voxels_coors = batch_inputs_dict['voxels']['coors']
else:
voxels_coors = None
keypoints = self.sample_key_points(points, voxels_coors)
point_features_list = []
batch_size = len(points)
if self.bev_cfg is not None:
point_bev_features = self.interpolate_from_bev_features(
keypoints, bev_encode_features, batch_size,
self.bev_cfg.bev_scale_factor)
point_features_list.append(point_bev_features.contiguous())
batch_size, num_keypoints, _ = keypoints.shape
key_xyz = keypoints.view(-1, 3)
key_xyz_batch_cnt = key_xyz.new_zeros(batch_size).int().fill_(
num_keypoints)
if self.rawpoints_sa_layer is not None:
batch_points = torch.cat(points, dim=0)
batch_cnt = [len(p) for p in points]
xyz = batch_points[:, :3].contiguous()
features = None
if batch_points.size(1) > 0:
features = batch_points[:, 3:].contiguous()
xyz_batch_cnt = xyz.new_tensor(batch_cnt, dtype=torch.int32)
pooled_points, pooled_features = self.rawpoints_sa_layer(
xyz=xyz.contiguous(),
xyz_batch_cnt=xyz_batch_cnt,
new_xyz=key_xyz.contiguous(),
new_xyz_batch_cnt=key_xyz_batch_cnt,
features=features.contiguous(),
)
point_features_list.append(pooled_features.contiguous().view(
batch_size, num_keypoints, -1))
if self.voxel_sa_layers is not None:
for k, voxel_sa_layer in enumerate(self.voxel_sa_layers):
cur_coords = voxel_encode_features[k].indices
xyz = self.get_voxel_centers(
coors=cur_coords,
scale_factor=self.voxel_sa_configs_list[k].scale_factor
).contiguous()
xyz_batch_cnt = xyz.new_zeros(batch_size).int()
for bs_idx in range(batch_size):
xyz_batch_cnt[bs_idx] = (cur_coords[:, 0] == bs_idx).sum()
pooled_points, pooled_features = voxel_sa_layer(
xyz=xyz.contiguous(),
xyz_batch_cnt=xyz_batch_cnt,
new_xyz=key_xyz.contiguous(),
new_xyz_batch_cnt=key_xyz_batch_cnt,
features=voxel_encode_features[k].features.contiguous(),
)
point_features_list.append(pooled_features.contiguous().view(
batch_size, num_keypoints, -1))
point_features = torch.cat(
point_features_list, dim=-1).view(batch_size * num_keypoints, -1,
1)
fusion_point_features = self.point_feature_fusion_layer(
point_features.unsqueeze(dim=-1)).squeeze(dim=-1)
batch_idxs = torch.arange(
batch_size * num_keypoints, device=keypoints.device
) // num_keypoints # batch indexes of each key points
batch_keypoints_xyz = torch.cat(
(batch_idxs.to(key_xyz.dtype).unsqueeze(dim=-1), key_xyz), dim=-1)
return dict(
keypoint_features=point_features.squeeze(dim=-1),
fusion_keypoint_features=fusion_point_features.squeeze(dim=-1),
keypoints=batch_keypoints_xyz)
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from mmdet.models.necks.fpn import FPN from mmdet.models.necks.fpn import FPN
from .dla_neck import DLANeck from .dla_neck import DLANeck
from .imvoxel_neck import OutdoorImVoxelNeck from .imvoxel_neck import OutdoorImVoxelNeck
from .pointnet2_fp_neck import PointNetFPNeck from .pointnet2_fp_neck import PointNetFPNeck
......
...@@ -5,10 +5,11 @@ from .h3d_roi_head import H3DRoIHead ...@@ -5,10 +5,11 @@ from .h3d_roi_head import H3DRoIHead
from .mask_heads import PointwiseSemanticHead, PrimitiveHead from .mask_heads import PointwiseSemanticHead, PrimitiveHead
from .part_aggregation_roi_head import PartAggregationROIHead from .part_aggregation_roi_head import PartAggregationROIHead
from .point_rcnn_roi_head import PointRCNNRoIHead from .point_rcnn_roi_head import PointRCNNRoIHead
from .pv_rcnn_roi_head import PVRCNNRoiHead
from .roi_extractors import Single3DRoIAwareExtractor, SingleRoIExtractor from .roi_extractors import Single3DRoIAwareExtractor, SingleRoIExtractor
__all__ = [ __all__ = [
'Base3DRoIHead', 'PartAggregationROIHead', 'PointwiseSemanticHead', 'Base3DRoIHead', 'PartAggregationROIHead', 'PointwiseSemanticHead',
'Single3DRoIAwareExtractor', 'PartA2BboxHead', 'SingleRoIExtractor', 'Single3DRoIAwareExtractor', 'PartA2BboxHead', 'SingleRoIExtractor',
'H3DRoIHead', 'PrimitiveHead', 'PointRCNNRoIHead' 'H3DRoIHead', 'PrimitiveHead', 'PointRCNNRoIHead', 'PVRCNNRoiHead'
] ]
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet.models.roi_heads import BaseRoIHead from mmdet.models.roi_heads import BaseRoIHead
from mmdet3d.registry import MODELS, TASK_UTILS
class Base3DRoIHead(BaseRoIHead): class Base3DRoIHead(BaseRoIHead):
"""Base class for 3d RoIHeads.""" """Base class for 3d RoIHeads."""
......
...@@ -3,12 +3,14 @@ from mmdet.models.roi_heads.bbox_heads import (BBoxHead, ConvFCBBoxHead, ...@@ -3,12 +3,14 @@ from mmdet.models.roi_heads.bbox_heads import (BBoxHead, ConvFCBBoxHead,
DoubleConvFCBBoxHead, DoubleConvFCBBoxHead,
Shared2FCBBoxHead, Shared2FCBBoxHead,
Shared4Conv1FCBBoxHead) Shared4Conv1FCBBoxHead)
from .h3d_bbox_head import H3DBboxHead from .h3d_bbox_head import H3DBboxHead
from .parta2_bbox_head import PartA2BboxHead from .parta2_bbox_head import PartA2BboxHead
from .point_rcnn_bbox_head import PointRCNNBboxHead from .point_rcnn_bbox_head import PointRCNNBboxHead
from .pv_rcnn_bbox_head import PVRCNNBBoxHead
__all__ = [ __all__ = [
'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead', 'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead',
'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 'PartA2BboxHead', 'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 'PartA2BboxHead',
'H3DBboxHead', 'PointRCNNBboxHead' 'H3DBboxHead', 'PointRCNNBboxHead', 'PVRCNNBBoxHead'
] ]
...@@ -3,6 +3,7 @@ from typing import Dict, List, Optional, Tuple ...@@ -3,6 +3,7 @@ from typing import Dict, List, Optional, Tuple
import torch import torch
from mmcv.cnn import ConvModule from mmcv.cnn import ConvModule
from mmdet.models.utils import multi_apply
from mmengine.model import BaseModule from mmengine.model import BaseModule
from mmengine.structures import InstanceData from mmengine.structures import InstanceData
from torch import Tensor from torch import Tensor
...@@ -15,7 +16,6 @@ from mmdet3d.models.losses import chamfer_distance ...@@ -15,7 +16,6 @@ from mmdet3d.models.losses import chamfer_distance
from mmdet3d.registry import MODELS, TASK_UTILS from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet3d.structures import (BaseInstance3DBoxes, DepthInstance3DBoxes, from mmdet3d.structures import (BaseInstance3DBoxes, DepthInstance3DBoxes,
Det3DDataSample) Det3DDataSample)
from mmdet.models.utils import multi_apply
@MODELS.register_module() @MODELS.register_module()
......
...@@ -4,6 +4,7 @@ from typing import Dict, List, Tuple ...@@ -4,6 +4,7 @@ from typing import Dict, List, Tuple
import numpy as np import numpy as np
import torch import torch
from mmcv.cnn import ConvModule from mmcv.cnn import ConvModule
from mmdet.models.utils import multi_apply
from mmengine.model import normal_init from mmengine.model import normal_init
from mmengine.structures import InstanceData from mmengine.structures import InstanceData
from torch import Tensor from torch import Tensor
...@@ -11,7 +12,6 @@ from torch import Tensor ...@@ -11,7 +12,6 @@ from torch import Tensor
from mmdet3d.models import make_sparse_convmodule from mmdet3d.models import make_sparse_convmodule
from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE
from mmdet3d.utils.typing import InstanceList from mmdet3d.utils.typing import InstanceList
from mmdet.models.utils import multi_apply
if IS_SPCONV2_AVAILABLE: if IS_SPCONV2_AVAILABLE:
from spconv.pytorch import (SparseConvTensor, SparseMaxPool3d, from spconv.pytorch import (SparseConvTensor, SparseMaxPool3d,
......
...@@ -6,6 +6,7 @@ import torch ...@@ -6,6 +6,7 @@ import torch
import torch.nn as nn import torch.nn as nn
from mmcv.cnn import ConvModule from mmcv.cnn import ConvModule
from mmcv.cnn.bricks import build_conv_layer from mmcv.cnn.bricks import build_conv_layer
from mmdet.models.utils import multi_apply
from mmengine.model import BaseModule, normal_init from mmengine.model import BaseModule, normal_init
from mmengine.structures import InstanceData from mmengine.structures import InstanceData
from torch import Tensor from torch import Tensor
...@@ -16,7 +17,6 @@ from mmdet3d.registry import MODELS, TASK_UTILS ...@@ -16,7 +17,6 @@ from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet3d.structures.bbox_3d import (LiDARInstance3DBoxes, from mmdet3d.structures.bbox_3d import (LiDARInstance3DBoxes,
rotation_3d_in_axis, xywhr2xyxyr) rotation_3d_in_axis, xywhr2xyxyr)
from mmdet3d.utils.typing import InstanceList, SamplingResultList from mmdet3d.utils.typing import InstanceList, SamplingResultList
from mmdet.models.utils import multi_apply
@MODELS.register_module() @MODELS.register_module()
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional, Tuple
import numpy as np
import torch
from mmcv.cnn import ConvModule
from mmdet.models.task_modules.samplers import SamplingResult
from mmdet.models.utils import multi_apply
from mmengine.model import BaseModule
from mmengine.structures import InstanceData
from torch import nn as nn
from mmdet3d.models.builder import build_loss
from mmdet3d.models.layers import nms_bev, nms_normal_bev
from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet3d.structures.bbox_3d import (LiDARInstance3DBoxes,
rotation_3d_in_axis, xywhr2xyxyr)
from mmdet3d.utils import InstanceList
@MODELS.register_module()
class PVRCNNBBoxHead(BaseModule):
"""PVRCNN BBox head.
Args:
in_channels (int): The number of input channel.
grid_size (int): The number of grid points in roi bbox.
num_classes (int): The number of classes.
class_agnostic (bool): Whether generate class agnostic prediction.
Defaults to True.
shared_fc_channels (tuple(int)): Out channels of each shared fc layer.
Defaults to (256, 256).
cls_channels (tuple(int)): Out channels of each classification layer.
Defaults to (256, 256).
reg_channels (tuple(int)): Out channels of each regression layer.
Defaults to (256, 256).
dropout_ratio (float): Ratio of dropout layer. Defaults to 0.5.
with_corner_loss (bool): Whether to use corner loss or not.
Defaults to True.
bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for box head.
Defaults to dict(type='DeltaXYZWLHRBBoxCoder').
norm_cfg (dict): Type of normalization method.
Defaults to dict(type='BN1d', eps=1e-5, momentum=0.1)
loss_bbox (dict): Config dict of box regression loss.
loss_cls (dict): Config dict of classifacation loss.
init_cfg (dict, optional): Initialize config of
model.
"""
def __init__(
self,
in_channels: int,
grid_size: int,
num_classes: int,
class_agnostic: bool = True,
shared_fc_channels: Tuple[int] = (256, 256),
cls_channels: Tuple[int] = (256, 256),
reg_channels: Tuple[int] = (256, 256),
dropout_ratio: float = 0.3,
with_corner_loss: bool = True,
bbox_coder: dict = dict(type='DeltaXYZWLHRBBoxCoder'),
norm_cfg: dict = dict(type='BN2d', eps=1e-5, momentum=0.1),
loss_bbox: dict = dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_cls: dict = dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=True,
reduction='none',
loss_weight=1.0),
init_cfg: Optional[dict] = dict(
type='Xavier', layer=['Conv2d', 'Conv1d'], distribution='uniform')
) -> None:
super(PVRCNNBBoxHead, self).__init__(init_cfg=init_cfg)
self.init_cfg = init_cfg
self.num_classes = num_classes
self.with_corner_loss = with_corner_loss
self.class_agnostic = class_agnostic
self.bbox_coder = TASK_UTILS.build(bbox_coder)
self.loss_bbox = build_loss(loss_bbox)
self.loss_cls = build_loss(loss_cls)
self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
cls_out_channels = 1 if class_agnostic else num_classes
self.reg_out_channels = self.bbox_coder.code_size * cls_out_channels
if self.use_sigmoid_cls:
self.cls_out_channels = cls_out_channels
else:
self.cls_out_channels = cls_out_channels + 1
self.dropout_ratio = dropout_ratio
self.grid_size = grid_size
# PVRCNNBBoxHead model in_channels is num of grid points in roi box.
in_channels *= (self.grid_size**3)
self.in_channels = in_channels
self.shared_fc_layer = self._make_fc_layers(
in_channels, shared_fc_channels,
range(len(shared_fc_channels) - 1), norm_cfg)
self.cls_layer = self._make_fc_layers(
shared_fc_channels[-1],
cls_channels,
range(1),
norm_cfg,
out_channels=self.cls_out_channels)
self.reg_layer = self._make_fc_layers(
shared_fc_channels[-1],
reg_channels,
range(1),
norm_cfg,
out_channels=self.reg_out_channels)
def _make_fc_layers(self,
in_channels: int,
fc_channels: list,
dropout_indices: list,
norm_cfg: dict,
out_channels: Optional[int] = None) -> torch.nn.Module:
"""Initial a full connection layer.
Args:
in_channels (int): Module in channels.
fc_channels (list): Full connection layer channels.
dropout_indices (list): Dropout indices.
norm_cfg (dict): Type of normalization method.
out_channels (int, optional): Module out channels.
"""
fc_layers = []
pre_channel = in_channels
for k in range(len(fc_channels)):
fc_layers.append(
ConvModule(
pre_channel,
fc_channels[k],
kernel_size=(1, 1),
stride=(1, 1),
norm_cfg=norm_cfg,
conv_cfg=dict(type='Conv2d'),
bias=False,
inplace=True))
pre_channel = fc_channels[k]
if self.dropout_ratio >= 0 and k in dropout_indices:
fc_layers.append(nn.Dropout(self.dropout_ratio))
if out_channels is not None:
fc_layers.append(
nn.Conv2d(fc_channels[-1], out_channels, 1, bias=True))
fc_layers = nn.Sequential(*fc_layers)
return fc_layers
def forward(self, feats: torch.Tensor) -> Tuple[torch.Tensor]:
"""Forward pvrcnn bbox head.
Args:
feats (torch.Tensor): Batch point-wise features.
Returns:
tuple[torch.Tensor]: Score of class and bbox predictions.
"""
# (B * N, 6, 6, 6, C)
rcnn_batch_size = feats.shape[0]
feats = feats.permute(0, 4, 1, 2,
3).contiguous().view(rcnn_batch_size, -1, 1, 1)
# (BxN, C*6*6*6)
shared_feats = self.shared_fc_layer(feats)
cls_score = self.cls_layer(shared_feats).transpose(
1, 2).contiguous().view(-1, self.cls_out_channels) # (B, 1)
bbox_pred = self.reg_layer(shared_feats).transpose(
1, 2).contiguous().view(-1, self.reg_out_channels) # (B, C)
return cls_score, bbox_pred
def loss(self, cls_score: torch.Tensor, bbox_pred: torch.Tensor,
rois: torch.Tensor, labels: torch.Tensor,
bbox_targets: torch.Tensor, pos_gt_bboxes: torch.Tensor,
reg_mask: torch.Tensor, label_weights: torch.Tensor,
bbox_weights: torch.Tensor) -> Dict:
"""Coumputing losses.
Args:
cls_score (torch.Tensor): Scores of each roi.
bbox_pred (torch.Tensor): Predictions of bboxes.
rois (torch.Tensor): Roi bboxes.
labels (torch.Tensor): Labels of class.
bbox_targets (torch.Tensor): Target of positive bboxes.
pos_gt_bboxes (torch.Tensor): Ground truths of positive bboxes.
reg_mask (torch.Tensor): Mask for positive bboxes.
label_weights (torch.Tensor): Weights of class loss.
bbox_weights (torch.Tensor): Weights of bbox loss.
Returns:
dict: Computed losses.
- loss_cls (torch.Tensor): Loss of classes.
- loss_bbox (torch.Tensor): Loss of bboxes.
- loss_corner (torch.Tensor): Loss of corners.
"""
losses = dict()
rcnn_batch_size = cls_score.shape[0]
# calculate class loss
cls_flat = cls_score.view(-1)
loss_cls = self.loss_cls(cls_flat, labels, label_weights)
losses['loss_cls'] = loss_cls
# calculate regression loss
code_size = self.bbox_coder.code_size
pos_inds = (reg_mask > 0)
if pos_inds.any() == 0:
# fake a part loss
losses['loss_bbox'] = 0 * bbox_pred.sum()
if self.with_corner_loss:
losses['loss_corner'] = 0 * bbox_pred.sum()
else:
pos_bbox_pred = bbox_pred.view(rcnn_batch_size, -1)[pos_inds]
bbox_weights_flat = bbox_weights[pos_inds].view(-1, 1).repeat(
1, pos_bbox_pred.shape[-1])
loss_bbox = self.loss_bbox(
pos_bbox_pred.unsqueeze(dim=0), bbox_targets.unsqueeze(dim=0),
bbox_weights_flat.unsqueeze(dim=0))
losses['loss_bbox'] = loss_bbox
if self.with_corner_loss:
pos_roi_boxes3d = rois[..., 1:].view(-1, code_size)[pos_inds]
pos_roi_boxes3d = pos_roi_boxes3d.view(-1, code_size)
batch_anchors = pos_roi_boxes3d.clone().detach()
pos_rois_rotation = pos_roi_boxes3d[..., 6].view(-1)
roi_xyz = pos_roi_boxes3d[..., 0:3].view(-1, 3)
batch_anchors[..., 0:3] = 0
# decode boxes
pred_boxes3d = self.bbox_coder.decode(
batch_anchors,
pos_bbox_pred.view(-1, code_size)).view(-1, code_size)
pred_boxes3d[..., 0:3] = rotation_3d_in_axis(
pred_boxes3d[..., 0:3].unsqueeze(1),
pos_rois_rotation,
axis=2).squeeze(1)
pred_boxes3d[:, 0:3] += roi_xyz
# calculate corner loss
loss_corner = self.get_corner_loss_lidar(
pred_boxes3d, pos_gt_bboxes)
losses['loss_corner'] = loss_corner.mean()
return losses
def get_targets(self,
sampling_results: SamplingResult,
rcnn_train_cfg: dict,
concat: bool = True) -> Tuple[torch.Tensor]:
"""Generate targets.
Args:
sampling_results (list[:obj:`SamplingResult`]):
Sampled results from rois.
rcnn_train_cfg (:obj:`ConfigDict`): Training config of rcnn.
concat (bool): Whether to concatenate targets between batches.
Returns:
tuple[torch.Tensor]: Targets of boxes and class prediction.
"""
pos_bboxes_list = [res.pos_bboxes for res in sampling_results]
pos_gt_bboxes_list = [res.pos_gt_bboxes for res in sampling_results]
iou_list = [res.iou for res in sampling_results]
targets = multi_apply(
self._get_target_single,
pos_bboxes_list,
pos_gt_bboxes_list,
iou_list,
cfg=rcnn_train_cfg)
(label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
bbox_weights) = targets
if concat:
label = torch.cat(label, 0)
bbox_targets = torch.cat(bbox_targets, 0)
pos_gt_bboxes = torch.cat(pos_gt_bboxes, 0)
reg_mask = torch.cat(reg_mask, 0)
label_weights = torch.cat(label_weights, 0)
label_weights /= torch.clamp(label_weights.sum(), min=1.0)
bbox_weights = torch.cat(bbox_weights, 0)
bbox_weights /= torch.clamp(bbox_weights.sum(), min=1.0)
return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
bbox_weights)
def _get_target_single(self, pos_bboxes: torch.Tensor,
pos_gt_bboxes: torch.Tensor, ious: torch.Tensor,
cfg: dict) -> Tuple[torch.Tensor]:
"""Generate training targets for a single sample.
Args:
pos_bboxes (torch.Tensor): Positive boxes with shape
(N, 7).
pos_gt_bboxes (torch.Tensor): Ground truth boxes with shape
(M, 7).
ious (torch.Tensor): IoU between `pos_bboxes` and `pos_gt_bboxes`
in shape (N, M).
cfg (dict): Training configs.
Returns:
tuple[torch.Tensor]: Target for positive boxes.
(label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
bbox_weights)
"""
cls_pos_mask = ious > cfg.cls_pos_thr
cls_neg_mask = ious < cfg.cls_neg_thr
interval_mask = (cls_pos_mask == 0) & (cls_neg_mask == 0)
# iou regression target
label = (cls_pos_mask > 0).float()
label[interval_mask] = ious[interval_mask] * 2 - 0.5
# label weights
label_weights = (label >= 0).float()
# box regression target
reg_mask = pos_bboxes.new_zeros(ious.size(0)).long()
reg_mask[0:pos_gt_bboxes.size(0)] = 1
bbox_weights = (reg_mask > 0).float()
if reg_mask.bool().any():
pos_gt_bboxes_ct = pos_gt_bboxes.clone().detach()
roi_center = pos_bboxes[..., 0:3]
roi_ry = pos_bboxes[..., 6] % (2 * np.pi)
# canonical transformation
pos_gt_bboxes_ct[..., 0:3] -= roi_center
pos_gt_bboxes_ct[..., 6] -= roi_ry
pos_gt_bboxes_ct[..., 0:3] = rotation_3d_in_axis(
pos_gt_bboxes_ct[..., 0:3].unsqueeze(1), -roi_ry,
axis=2).squeeze(1)
# flip orientation if rois have opposite orientation
ry_label = pos_gt_bboxes_ct[..., 6] % (2 * np.pi) # 0 ~ 2pi
opposite_flag = (ry_label > np.pi * 0.5) & (ry_label < np.pi * 1.5)
ry_label[opposite_flag] = (ry_label[opposite_flag] + np.pi) % (
2 * np.pi) # (0 ~ pi/2, 3pi/2 ~ 2pi)
flag = ry_label > np.pi
ry_label[flag] = ry_label[flag] - np.pi * 2 # (-pi/2, pi/2)
ry_label = torch.clamp(ry_label, min=-np.pi / 2, max=np.pi / 2)
pos_gt_bboxes_ct[..., 6] = ry_label
rois_anchor = pos_bboxes.clone().detach()
rois_anchor[:, 0:3] = 0
rois_anchor[:, 6] = 0
bbox_targets = self.bbox_coder.encode(rois_anchor,
pos_gt_bboxes_ct)
else:
# no fg bbox
bbox_targets = pos_gt_bboxes.new_empty((0, 7))
return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
bbox_weights)
def get_corner_loss_lidar(self,
pred_bbox3d: torch.Tensor,
gt_bbox3d: torch.Tensor,
delta: float = 1.0) -> torch.Tensor:
"""Calculate corner loss of given boxes.
Args:
pred_bbox3d (torch.FloatTensor): Predicted boxes in shape (N, 7).
gt_bbox3d (torch.FloatTensor): Ground truth boxes in shape (N, 7).
delta (float, optional): huber loss threshold. Defaults to 1.0
Returns:
torch.FloatTensor: Calculated corner loss in shape (N).
"""
assert pred_bbox3d.shape[0] == gt_bbox3d.shape[0]
# This is a little bit hack here because we assume the box for
# Part-A2 is in LiDAR coordinates
gt_boxes_structure = LiDARInstance3DBoxes(gt_bbox3d)
pred_box_corners = LiDARInstance3DBoxes(pred_bbox3d).corners
gt_box_corners = gt_boxes_structure.corners
# This flip only changes the heading direction of GT boxes
gt_bbox3d_flip = gt_boxes_structure.clone()
gt_bbox3d_flip.tensor[:, 6] += np.pi
gt_box_corners_flip = gt_bbox3d_flip.corners
corner_dist = torch.min(
torch.norm(pred_box_corners - gt_box_corners, dim=2),
torch.norm(pred_box_corners - gt_box_corners_flip,
dim=2)) # (N, 8)
# huber loss
abs_error = torch.abs(corner_dist)
corner_loss = torch.where(abs_error < delta,
0.5 * abs_error**2 / delta,
abs_error - 0.5 * delta)
return corner_loss.mean(dim=1)
def get_results(self,
rois: torch.Tensor,
cls_preds: torch.Tensor,
bbox_reg: torch.Tensor,
class_labels: torch.Tensor,
input_metas: List[dict],
test_cfg: dict = None) -> InstanceList:
"""Generate bboxes from bbox head predictions.
Args:
rois (torch.Tensor): Roi bounding boxes.
cls_preds (torch.Tensor): Scores of bounding boxes.
bbox_reg (torch.Tensor): Bounding boxes predictions
class_labels (torch.Tensor): Label of classes
input_metas (list[dict]): Point cloud meta info.
test_cfg (:obj:`ConfigDict`): Testing config.
Returns:
list[:obj:`InstanceData`]: Detection results of each sample
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, C), where
C >= 7.
"""
roi_batch_id = rois[..., 0]
roi_boxes = rois[..., 1:] # boxes without batch id
batch_size = int(roi_batch_id.max().item() + 1)
# decode boxes
roi_ry = roi_boxes[..., 6].view(-1)
roi_xyz = roi_boxes[..., 0:3].view(-1, 3)
local_roi_boxes = roi_boxes.clone().detach()
local_roi_boxes[..., 0:3] = 0
batch_box_preds = self.bbox_coder.decode(local_roi_boxes, bbox_reg)
batch_box_preds[..., 0:3] = rotation_3d_in_axis(
batch_box_preds[..., 0:3].unsqueeze(1), roi_ry, axis=2).squeeze(1)
batch_box_preds[:, 0:3] += roi_xyz
# post processing
result_list = []
for batch_id in range(batch_size):
cur_cls_preds = cls_preds[roi_batch_id == batch_id]
box_preds = batch_box_preds[roi_batch_id == batch_id]
label_preds = class_labels[batch_id]
cur_cls_preds = cur_cls_preds.sigmoid()
cur_cls_preds, _ = torch.max(cur_cls_preds, dim=-1)
selected = self.class_agnostic_nms(
scores=cur_cls_preds,
bbox_preds=box_preds,
input_meta=input_metas[batch_id],
nms_cfg=test_cfg)
selected_bboxes = box_preds[selected]
selected_label_preds = label_preds[selected]
selected_scores = cur_cls_preds[selected]
results = InstanceData()
results.bboxes_3d = input_metas[batch_id]['box_type_3d'](
selected_bboxes, self.bbox_coder.code_size)
results.scores_3d = selected_scores
results.labels_3d = selected_label_preds
result_list.append(results)
return result_list
def class_agnostic_nms(self, scores: torch.Tensor,
bbox_preds: torch.Tensor, nms_cfg: dict,
input_meta: dict) -> Tuple[torch.Tensor]:
"""Class agnostic NMS for box head.
Args:
scores (torch.Tensor): Object score of bounding boxes.
bbox_preds (torch.Tensor): Predicted bounding boxes.
nms_cfg (dict): NMS config dict.
input_meta (dict): Contain pcd and img's meta info.
Returns:
tuple[torch.Tensor]: Bounding boxes, scores and labels.
"""
obj_scores = scores.clone()
if nms_cfg.use_rotate_nms:
nms_func = nms_bev
else:
nms_func = nms_normal_bev
bbox = input_meta['box_type_3d'](
bbox_preds.clone(),
box_dim=bbox_preds.shape[-1],
with_yaw=True,
origin=(0.5, 0.5, 0.5))
if nms_cfg.score_thr is not None:
scores_mask = (obj_scores >= nms_cfg.score_thr)
obj_scores = obj_scores[scores_mask]
bbox = bbox[scores_mask]
selected = []
if obj_scores.shape[0] > 0:
box_scores_nms, indices = torch.topk(
obj_scores, k=min(4096, obj_scores.shape[0]))
bbox_bev = bbox.bev[indices]
bbox_for_nms = xywhr2xyxyr(bbox_bev)
keep = nms_func(bbox_for_nms, box_scores_nms, nms_cfg.nms_thr)
selected = indices[keep]
if nms_cfg.score_thr is not None:
original_idxs = scores_mask.nonzero().view(-1)
selected = original_idxs[selected]
return selected
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from .foreground_segmentation_head import ForegroundSegmentationHead
from .pointwise_semantic_head import PointwiseSemanticHead from .pointwise_semantic_head import PointwiseSemanticHead
from .primitive_head import PrimitiveHead from .primitive_head import PrimitiveHead
__all__ = ['PointwiseSemanticHead', 'PrimitiveHead'] __all__ = [
'PointwiseSemanticHead', 'PrimitiveHead', 'ForegroundSegmentationHead'
]
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, Optional, Tuple
import torch
from mmcv.cnn.bricks import build_norm_layer
from mmdet.models.utils import multi_apply
from mmengine.model import BaseModule
from mmengine.structures import InstanceData
from torch import nn as nn
from mmdet3d.models.builder import build_loss
from mmdet3d.registry import MODELS
from mmdet3d.utils import InstanceList
@MODELS.register_module()
class ForegroundSegmentationHead(BaseModule):
"""Foreground segmentation head.
Args:
in_channels (int): The number of input channel.
mlp_channels (tuple[int]): Specify of mlp channels. Defaults
to (256, 256).
extra_width (float): Boxes enlarge width. Default used 0.1.
norm_cfg (dict): Type of normalization method. Defaults to
dict(type='BN1d', eps=1e-5, momentum=0.1).
init_cfg (dict, optional): Initialize config of
model. Defaults to None.
loss_seg (dict): Config of segmentation loss. Defaults to
dict(type='mmdet.FocalLoss')
"""
def __init__(
self,
in_channels: int,
mlp_channels: Tuple[int] = (256, 256),
extra_width: float = 0.1,
norm_cfg: dict = dict(type='BN1d', eps=1e-5, momentum=0.1),
init_cfg: Optional[dict] = None,
loss_seg: dict = dict(
type='mmdet.FocalLoss',
use_sigmoid=True,
reduction='sum',
gamma=2.0,
alpha=0.25,
activated=True,
loss_weight=1.0)
) -> None:
super(ForegroundSegmentationHead, self).__init__(init_cfg=init_cfg)
self.extra_width = extra_width
self.num_classes = 1
self.in_channels = in_channels
self.use_sigmoid_cls = loss_seg.get('use_sigmoid', False)
out_channels = 1
if self.use_sigmoid_cls:
self.out_channels = out_channels
else:
self.out_channels = out_channels + 1
mlps_layers = []
cin = in_channels
for mlp in mlp_channels:
mlps_layers.extend([
nn.Linear(cin, mlp, bias=False),
build_norm_layer(norm_cfg, mlp)[1],
nn.ReLU()
])
cin = mlp
mlps_layers.append(nn.Linear(cin, self.out_channels, bias=True))
self.seg_cls_layer = nn.Sequential(*mlps_layers)
self.loss_seg = build_loss(loss_seg)
def forward(self, feats: torch.Tensor) -> dict:
"""Forward head.
Args:
feats (torch.Tensor): Point-wise features.
Returns:
dict: Segment predictions.
"""
seg_preds = self.seg_cls_layer(feats)
return dict(seg_preds=seg_preds)
def _get_targets_single(self, point_xyz: torch.Tensor,
gt_bboxes_3d: InstanceData,
gt_labels_3d: torch.Tensor) -> torch.Tensor:
"""generate segmentation targets for a single sample.
Args:
point_xyz (torch.Tensor): Coordinate of points.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in
shape (box_num, 7).
gt_labels_3d (torch.Tensor): Class labels of ground truths in
shape (box_num).
Returns:
torch.Tensor: Points class labels.
"""
point_cls_labels_single = point_xyz.new_zeros(
point_xyz.shape[0]).long()
enlarged_gt_boxes = gt_bboxes_3d.enlarged_box(self.extra_width)
box_idxs_of_pts = gt_bboxes_3d.points_in_boxes_part(point_xyz).long()
extend_box_idxs_of_pts = enlarged_gt_boxes.points_in_boxes_part(
point_xyz).long()
box_fg_flag = box_idxs_of_pts >= 0
fg_flag = box_fg_flag.clone()
ignore_flag = fg_flag ^ (extend_box_idxs_of_pts >= 0)
point_cls_labels_single[ignore_flag] = -1
gt_box_of_fg_points = gt_labels_3d[box_idxs_of_pts[fg_flag]]
point_cls_labels_single[
fg_flag] = 1 if self.num_classes == 1 else\
gt_box_of_fg_points.long()
return point_cls_labels_single,
def get_targets(self, points_bxyz: torch.Tensor,
batch_gt_instances_3d: InstanceList) -> dict:
"""Generate segmentation targets.
Args:
points_bxyz (torch.Tensor): The coordinates of point in shape
(B, num_points, 3).
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances. It usually includes ``bboxes_3d`` and
``labels_3d`` attributes.
Returns:
dict: Prediction targets
- seg_targets (torch.Tensor): Segmentation targets.
"""
batch_size = len(batch_gt_instances_3d)
points_xyz_list = []
gt_bboxes_3d = []
gt_labels_3d = []
for idx in range(batch_size):
coords_idx = points_bxyz[:, 0] == idx
points_xyz_list.append(points_bxyz[coords_idx][..., 1:])
gt_bboxes_3d.append(batch_gt_instances_3d[idx].bboxes_3d)
gt_labels_3d.append(batch_gt_instances_3d[idx].labels_3d)
seg_targets, = multi_apply(self._get_targets_single, points_xyz_list,
gt_bboxes_3d, gt_labels_3d)
seg_targets = torch.cat(seg_targets, dim=0)
return dict(seg_targets=seg_targets)
def loss(self, semantic_results: dict,
semantic_targets: dict) -> Dict[str, torch.Tensor]:
"""Calculate point-wise segmentation losses.
Args:
semantic_results (dict): Results from semantic head.
semantic_targets (dict): Targets of semantic results.
Returns:
dict: Loss of segmentation.
- loss_semantic (torch.Tensor): Segmentation prediction loss.
"""
seg_preds = semantic_results['seg_preds']
seg_targets = semantic_targets['seg_targets']
positives = (seg_targets > 0)
negative_cls_weights = (seg_targets == 0).float()
seg_weights = (negative_cls_weights + 1.0 * positives).float()
pos_normalizer = positives.sum(dim=0).float()
seg_weights /= torch.clamp(pos_normalizer, min=1.0)
seg_preds = torch.sigmoid(seg_preds)
loss_seg = self.loss_seg(seg_preds, (~positives).long(), seg_weights)
return dict(loss_semantic=loss_seg)
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
from typing import Dict, Optional, Tuple from typing import Dict, Optional, Tuple
import torch import torch
from mmdet.models.utils import multi_apply
from mmengine.model import BaseModule from mmengine.model import BaseModule
from torch import Tensor from torch import Tensor
from torch import nn as nn from torch import nn as nn
...@@ -10,7 +11,6 @@ from torch.nn import functional as F ...@@ -10,7 +11,6 @@ from torch.nn import functional as F
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from mmdet3d.structures.bbox_3d import BaseInstance3DBoxes, rotation_3d_in_axis from mmdet3d.structures.bbox_3d import BaseInstance3DBoxes, rotation_3d_in_axis
from mmdet3d.utils import InstanceList from mmdet3d.utils import InstanceList
from mmdet.models.utils import multi_apply
@MODELS.register_module() @MODELS.register_module()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment