"vscode:/vscode.git/clone" did not exist on "5489406c4a06780c23357880588f807a5f2f52e7"
Unverified Commit d7067e44 authored by Wenwei Zhang's avatar Wenwei Zhang Committed by GitHub
Browse files

Bump version to v1.1.0rc2

Bump to v1.1.0rc2
parents 28fe73d2 fb0e57e5
# Copyright (c) OpenMMLab. All rights reserved.
from mmdet.models.losses import FocalLoss, SmoothL1Loss, binary_cross_entropy
from .axis_aligned_iou_loss import AxisAlignedIoULoss, axis_aligned_iou_loss
from .chamfer_distance import ChamferDistance, chamfer_distance
from .multibin_loss import MultiBinLoss
from .paconv_regularization_loss import PAConvRegularizationLoss
from .rotated_iou_loss import RotatedIoU3DLoss, rotated_iou_3d_loss
from .uncertain_smooth_l1_loss import UncertainL1Loss, UncertainSmoothL1Loss
__all__ = [
'FocalLoss', 'SmoothL1Loss', 'binary_cross_entropy', 'ChamferDistance',
'chamfer_distance', 'axis_aligned_iou_loss', 'AxisAlignedIoULoss',
'PAConvRegularizationLoss', 'UncertainL1Loss', 'UncertainSmoothL1Loss',
'MultiBinLoss'
'MultiBinLoss', 'RotatedIoU3DLoss', 'rotated_iou_3d_loss'
]
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from mmdet.models.losses.utils import weighted_loss
from torch import nn as nn
from mmdet3d.registry import MODELS
from mmdet3d.structures import AxisAlignedBboxOverlaps3D
from mmdet.models.losses.utils import weighted_loss
@weighted_loss
......
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from mmdet.models.losses.utils import weighted_loss
from torch import nn as nn
from torch.nn import functional as F
from mmdet3d.registry import MODELS
from mmdet.models.losses.utils import weighted_loss
@weighted_loss
......
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from mmdet.models.losses.utils import weight_reduce_loss
from torch import nn as nn
from mmdet3d.registry import MODELS
from mmdet.models.losses.utils import weight_reduce_loss
from ..layers import PAConv, PAConvCUDA
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional
import torch
from mmcv.ops import diff_iou_rotated_3d
from mmdet.models.losses.utils import weighted_loss
from torch import Tensor
from torch import nn as nn
from mmdet3d.registry import MODELS
@weighted_loss
def rotated_iou_3d_loss(pred, target: Tensor) -> Tensor:
"""Calculate the IoU loss (1-IoU) of two sets of rotated bounding boxes.
Note that predictions and targets are one-to-one corresponded.
Args:
pred (torch.Tensor): Bbox predictions with shape [N, 7]
(x, y, z, w, l, h, alpha).
target (torch.Tensor): Bbox targets (gt) with shape [N, 7]
(x, y, z, w, l, h, alpha).
Returns:
torch.Tensor: IoU loss between predictions and targets.
"""
iou_loss = 1 - diff_iou_rotated_3d(pred.unsqueeze(0),
target.unsqueeze(0))[0]
return iou_loss
@MODELS.register_module()
class RotatedIoU3DLoss(nn.Module):
"""Calculate the IoU loss (1-IoU) of rotated bounding boxes.
Args:
reduction (str): Method to reduce losses.
The valid reduction method are none, sum or mean.
loss_weight (float, optional): Weight of loss. Defaults to 1.0.
"""
def __init__(self,
reduction: str = 'mean',
loss_weight: Optional[float] = 1.0):
super().__init__()
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred: Tensor,
target: Tensor,
weight: Optional[Tensor] = None,
avg_factor: Optional[int] = None,
reduction_override: Optional[str] = None,
**kwargs) -> Tensor:
"""Forward function of loss calculation.
Args:
pred (torch.Tensor): Bbox predictions with shape [..., 7]
(x, y, z, w, l, h, alpha).
target (torch.Tensor): Bbox targets (gt) with shape [..., 7]
(x, y, z, w, l, h, alpha).
weight (torch.Tensor | float, optional): Weight of loss.
Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction_override (str, optional): Method to reduce losses.
The valid reduction method are 'none', 'sum' or 'mean'.
Defaults to None.
Returns:
torch.Tensor: IoU loss between predictions and targets.
"""
if weight is not None and not torch.any(weight > 0):
return pred.sum() * weight.sum() # 0
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
if weight is not None and weight.dim() > 1:
weight = weight.mean(-1)
loss = self.loss_weight * rotated_iou_3d_loss(
pred,
target,
weight,
reduction=reduction,
avg_factor=avg_factor,
**kwargs)
return loss
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from mmdet.models.losses.utils import weighted_loss
from torch import nn as nn
from mmdet3d.registry import MODELS
from mmdet.models.losses.utils import weighted_loss
@weighted_loss
......
......@@ -2,7 +2,9 @@
from .pillar_scatter import PointPillarsScatter
from .sparse_encoder import SparseEncoder, SparseEncoderSASSD
from .sparse_unet import SparseUNet
from .voxel_set_abstraction import VoxelSetAbstraction
__all__ = [
'PointPillarsScatter', 'SparseEncoder', 'SparseEncoderSASSD', 'SparseUNet'
'PointPillarsScatter', 'SparseEncoder', 'SparseEncoderSASSD', 'SparseUNet',
'VoxelSetAbstraction'
]
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Tuple
import torch
from mmcv.ops import points_in_boxes_all, three_interpolate, three_nn
from mmdet.models.losses import sigmoid_focal_loss, smooth_l1_loss
from torch import Tensor
from torch import nn as nn
from mmdet3d.models.layers import SparseBasicBlock, make_sparse_convmodule
from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE
from mmdet3d.registry import MODELS
from mmdet.models.losses import sigmoid_focal_loss, smooth_l1_loss
from mmdet3d.structures import BaseInstance3DBoxes
if IS_SPCONV2_AVAILABLE:
from spconv.pytorch import SparseConvTensor, SparseSequential
......@@ -37,6 +41,8 @@ class SparseEncoder(nn.Module):
Defaults to ((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, 1)).
block_type (str, optional): Type of the block to use.
Defaults to 'conv_module'.
return_middle_feats (bool): Whether output middle features.
Default to False.
"""
def __init__(self,
......@@ -50,7 +56,8 @@ class SparseEncoder(nn.Module):
64)),
encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,
1)),
block_type='conv_module'):
block_type='conv_module',
return_middle_feats=False):
super().__init__()
assert block_type in ['conv_module', 'basicblock']
self.sparse_shape = sparse_shape
......@@ -62,6 +69,7 @@ class SparseEncoder(nn.Module):
self.encoder_paddings = encoder_paddings
self.stage_num = len(self.encoder_channels)
self.fp16_enabled = False
self.return_middle_feats = return_middle_feats
# Spconv init all weight on its own
assert isinstance(order, tuple) and len(order) == 3
......@@ -113,7 +121,14 @@ class SparseEncoder(nn.Module):
batch_size (int): Batch size.
Returns:
dict: Backbone features.
torch.Tensor | tuple[torch.Tensor, list]: Return spatial features
include:
- spatial_features (torch.Tensor): Spatial features are out from
the last layer.
- encode_features (List[SparseConvTensor], optional): Middle layer
output features. When self.return_middle_feats is True, the
module returns middle features.
"""
coors = coors.int()
input_sp_tensor = SparseConvTensor(voxel_features, coors,
......@@ -133,7 +148,10 @@ class SparseEncoder(nn.Module):
N, C, D, H, W = spatial_features.shape
spatial_features = spatial_features.view(N, C * D, H, W)
return spatial_features
if self.return_middle_feats:
return spatial_features, encode_features
else:
return spatial_features
def make_encoder_layers(self,
make_block,
......@@ -238,17 +256,17 @@ class SparseEncoderSASSD(SparseEncoder):
"""
def __init__(self,
in_channels,
sparse_shape,
order=('conv', 'norm', 'act'),
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
base_channels=16,
output_channels=128,
encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64,
64)),
encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,
1)),
block_type='conv_module'):
in_channels: int,
sparse_shape: List[int],
order: Tuple[str] = ('conv', 'norm', 'act'),
norm_cfg: dict = dict(type='BN1d', eps=1e-3, momentum=0.01),
base_channels: int = 16,
output_channels: int = 128,
encoder_channels: Tuple[tuple] = ((16, ), (32, 32, 32),
(64, 64, 64), (64, 64, 64)),
encoder_paddings: Tuple[tuple] = ((1, ), (1, 1, 1), (1, 1, 1),
((0, 1, 1), 1, 1)),
block_type: str = 'conv_module'):
super(SparseEncoderSASSD, self).__init__(
in_channels=in_channels,
sparse_shape=sparse_shape,
......@@ -264,7 +282,11 @@ class SparseEncoderSASSD(SparseEncoder):
self.point_cls = nn.Linear(64, 1, bias=False)
self.point_reg = nn.Linear(64, 3, bias=False)
def forward(self, voxel_features, coors, batch_size, test_mode=False):
def forward(self,
voxel_features: Tensor,
coors: Tensor,
batch_size: Tensor,
test_mode: bool = False) -> Tuple[Tensor, tuple]:
"""Forward of SparseEncoder.
Args:
......@@ -276,7 +298,7 @@ class SparseEncoderSASSD(SparseEncoder):
Defaults to False.
Returns:
dict: Backbone features.
Tensor: Backbone features.
tuple[torch.Tensor]: Mean feature value of the points,
Classification result of the points,
Regression offsets of the points.
......@@ -333,14 +355,17 @@ class SparseEncoderSASSD(SparseEncoder):
return spatial_features, point_misc
def get_auxiliary_targets(self, nxyz, gt_boxes3d, enlarge=1.0):
def get_auxiliary_targets(self,
points_feats: Tensor,
gt_bboxes_3d: List[BaseInstance3DBoxes],
enlarge: float = 1.0) -> Tuple[Tensor, Tensor]:
"""Get auxiliary target.
Args:
nxyz (torch.Tensor): Mean features of the points.
gt_boxes3d (torch.Tensor): Coordinates in shape (N, 4),
the columns in the order of (batch_idx, z_idx, y_idx, x_idx).
enlarge (int, optional): Enlaged scale. Defaults to 1.0.
points_feats (torch.Tensor): Mean features of the points.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
boxes for each sample.
enlarge (float, optional): Enlaged scale. Defaults to 1.0.
Returns:
tuple[torch.Tensor]: Label of the points and
......@@ -348,31 +373,32 @@ class SparseEncoderSASSD(SparseEncoder):
"""
center_offsets = list()
pts_labels = list()
for i in range(len(gt_boxes3d)):
boxes3d = gt_boxes3d[i].tensor.cpu()
idx = torch.nonzero(nxyz[:, 0] == i).view(-1)
new_xyz = nxyz[idx, 1:].cpu()
for i in range(len(gt_bboxes_3d)):
boxes3d = gt_bboxes_3d[i].tensor.detach().clone()
idx = torch.nonzero(points_feats[:, 0] == i).view(-1)
point_xyz = points_feats[idx, 1:].detach().clone()
boxes3d[:, 3:6] *= enlarge
pts_in_flag, center_offset = self.calculate_pts_offsets(
new_xyz, boxes3d)
point_xyz, boxes3d)
pts_label = pts_in_flag.max(0)[0].byte()
pts_labels.append(pts_label)
center_offsets.append(center_offset)
center_offsets = torch.cat(center_offsets).cuda()
center_offsets = torch.cat(center_offsets)
pts_labels = torch.cat(pts_labels).to(center_offsets.device)
return pts_labels, center_offsets
def calculate_pts_offsets(self, points, boxes):
def calculate_pts_offsets(self, points: Tensor,
bboxes_3d: Tensor) -> Tuple[Tensor, Tensor]:
"""Find all boxes in which each point is, as well as the offsets from
the box centers.
Args:
points (torch.Tensor): [M, 3], [x, y, z] in LiDAR/DEPTH coordinate
boxes (torch.Tensor): [T, 7],
points (torch.Tensor): [M, 3], [x, y, z] in LiDAR coordinate
bboxes_3d (torch.Tensor): [T, 7],
num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz],
(x, y, z) is the bottom center.
......@@ -383,44 +409,41 @@ class SparseEncoderSASSD(SparseEncoder):
if it belows to the box, with the shape of (M, 3).
Default background = 0.
"""
boxes_num = len(boxes)
boxes_num = len(bboxes_3d)
pts_num = len(points)
points = points.cuda()
boxes = boxes.to(points.device)
box_idxs_of_pts = points_in_boxes_all(points[None, ...], boxes[None,
box_indices = points_in_boxes_all(points[None, ...], bboxes_3d[None,
...])
pts_indices = box_idxs_of_pts.squeeze(0).transpose(0, 1)
pts_indices = box_indices.squeeze(0).transpose(0, 1)
center_offsets = torch.zeros_like(points).to(points.device)
for i in range(boxes_num):
for j in range(pts_num):
if pts_indices[i][j] == 1:
center_offsets[j][0] = points[j][0] - boxes[i][0]
center_offsets[j][1] = points[j][1] - boxes[i][1]
center_offsets[j][0] = points[j][0] - bboxes_3d[i][0]
center_offsets[j][1] = points[j][1] - bboxes_3d[i][1]
center_offsets[j][2] = (
points[j][2] - (boxes[i][2] + boxes[i][2] / 2.0))
return pts_indices.cpu(), center_offsets.cpu()
points[j][2] -
(bboxes_3d[i][2] + bboxes_3d[i][2] / 2.0))
return pts_indices, center_offsets
def aux_loss(self, points, point_cls, point_reg, gt_bboxes):
def aux_loss(self, points: Tensor, point_cls: Tensor, point_reg: Tensor,
gt_bboxes_3d: Tensor) -> dict:
"""Calculate auxiliary loss.
Args:
points (torch.Tensor): Mean feature value of the points.
point_cls (torch.Tensor): Classification result of the points.
point_reg (torch.Tensor): Regression offsets of the points.
gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Ground truth
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
boxes for each sample.
Returns:
dict: Backbone features.
dict: Auxiliary loss.
"""
num_boxes = len(gt_bboxes)
num_boxes = len(gt_bboxes_3d)
pts_labels, center_targets = self.get_auxiliary_targets(
points, gt_bboxes)
points, gt_bboxes_3d)
rpn_cls_target = pts_labels.long()
pos = (pts_labels > 0).float()
......@@ -449,11 +472,13 @@ class SparseEncoderSASSD(SparseEncoder):
return dict(aux_loss_cls=aux_loss_cls, aux_loss_reg=aux_loss_reg)
def make_auxiliary_points(self,
source_tensor,
target,
offset=(0., -40., -3.),
voxel_size=(.05, .05, .1)):
def make_auxiliary_points(
self,
source_tensor: Tensor,
target: Tensor,
offset: Tuple = (0., -40., -3.),
voxel_size: Tuple = (.05, .05, .1)
) -> Tensor:
"""Make auxiliary points for loss computation.
Args:
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Optional
import mmengine
import torch
import torch.nn as nn
from mmcv.cnn import ConvModule
from mmcv.ops.furthest_point_sample import furthest_point_sample
from mmengine.model import BaseModule
from mmdet3d.registry import MODELS
from mmdet3d.utils import InstanceList
def bilinear_interpolate_torch(inputs, x, y):
"""Bilinear interpolate for inputs."""
x0 = torch.floor(x).long()
x1 = x0 + 1
y0 = torch.floor(y).long()
y1 = y0 + 1
x0 = torch.clamp(x0, 0, inputs.shape[1] - 1)
x1 = torch.clamp(x1, 0, inputs.shape[1] - 1)
y0 = torch.clamp(y0, 0, inputs.shape[0] - 1)
y1 = torch.clamp(y1, 0, inputs.shape[0] - 1)
Ia = inputs[y0, x0]
Ib = inputs[y1, x0]
Ic = inputs[y0, x1]
Id = inputs[y1, x1]
wa = (x1.type_as(x) - x) * (y1.type_as(y) - y)
wb = (x1.type_as(x) - x) * (y - y0.type_as(y))
wc = (x - x0.type_as(x)) * (y1.type_as(y) - y)
wd = (x - x0.type_as(x)) * (y - y0.type_as(y))
ans = torch.t((torch.t(Ia) * wa)) + torch.t(torch.t(Ib) * wb) + torch.t(
torch.t(Ic) * wc) + torch.t(torch.t(Id) * wd)
return ans
@MODELS.register_module()
class VoxelSetAbstraction(BaseModule):
"""Voxel set abstraction module for PVRCNN and PVRCNN++.
Args:
num_keypoints (int): The number of key points sampled from
raw points cloud.
fused_out_channel (int): Key points feature output channels
num after fused. Default to 128.
voxel_size (list[float]): Size of voxels. Defaults to
[0.05, 0.05, 0.1].
point_cloud_range (list[float]): Point cloud range. Defaults to
[0, -40, -3, 70.4, 40, 1].
voxel_sa_cfgs_list (List[dict or ConfigDict], optional): List of SA
module cfg. Used to gather key points features from multi-wise
voxel features. Default to None.
rawpoints_sa_cfgs (dict or ConfigDict, optional): SA module cfg.
Used to gather key points features from raw points. Default to
None.
bev_feat_channel (int): Bev features channels num.
Default to 256.
bev_scale_factor (int): Bev features scale factor. Default to 8.
voxel_center_as_source (bool): Whether used voxel centers as points
cloud key points. Defaults to False.
norm_cfg (dict[str]): Config of normalization layer. Default
used dict(type='BN1d', eps=1e-5, momentum=0.1).
bias (bool | str, optional): If specified as `auto`, it will be
decided by `norm_cfg`. `bias` will be set as True if
`norm_cfg` is None, otherwise False. Default: 'auto'.
"""
def __init__(self,
num_keypoints: int,
fused_out_channel: int = 128,
voxel_size: list = [0.05, 0.05, 0.1],
point_cloud_range: list = [0, -40, -3, 70.4, 40, 1],
voxel_sa_cfgs_list: Optional[list] = None,
rawpoints_sa_cfgs: Optional[dict] = None,
bev_feat_channel: int = 256,
bev_scale_factor: int = 8,
voxel_center_as_source: bool = False,
norm_cfg: dict = dict(type='BN2d', eps=1e-5, momentum=0.1),
bias: str = 'auto') -> None:
super().__init__()
self.num_keypoints = num_keypoints
self.fused_out_channel = fused_out_channel
self.voxel_size = voxel_size
self.point_cloud_range = point_cloud_range
self.voxel_center_as_source = voxel_center_as_source
gathered_channel = 0
if rawpoints_sa_cfgs is not None:
self.rawpoints_sa_layer = MODELS.build(rawpoints_sa_cfgs)
gathered_channel += sum(
[x[-1] for x in rawpoints_sa_cfgs.mlp_channels])
else:
self.rawpoints_sa_layer = None
if voxel_sa_cfgs_list is not None:
self.voxel_sa_configs_list = voxel_sa_cfgs_list
self.voxel_sa_layers = nn.ModuleList()
for voxel_sa_config in voxel_sa_cfgs_list:
cur_layer = MODELS.build(voxel_sa_config)
self.voxel_sa_layers.append(cur_layer)
gathered_channel += sum(
[x[-1] for x in voxel_sa_config.mlp_channels])
else:
self.voxel_sa_layers = None
if bev_feat_channel is not None and bev_scale_factor is not None:
self.bev_cfg = mmengine.Config(
dict(
bev_feat_channels=bev_feat_channel,
bev_scale_factor=bev_scale_factor))
gathered_channel += bev_feat_channel
else:
self.bev_cfg = None
self.point_feature_fusion_layer = nn.Sequential(
ConvModule(
gathered_channel,
fused_out_channel,
kernel_size=(1, 1),
stride=(1, 1),
conv_cfg=dict(type='Conv2d'),
norm_cfg=norm_cfg,
bias=bias))
def interpolate_from_bev_features(self, keypoints: torch.Tensor,
bev_features: torch.Tensor,
batch_size: int,
bev_scale_factor: int) -> torch.Tensor:
"""Gather key points features from bev feature map by interpolate.
Args:
keypoints (torch.Tensor): Sampled key points with shape
(N1 + N2 + ..., NDim).
bev_features (torch.Tensor): Bev feature map from the first
stage with shape (B, C, H, W).
batch_size (int): Input batch size.
bev_scale_factor (int): Bev feature map scale factor.
Returns:
torch.Tensor: Key points features gather from bev feature
map with shape (N1 + N2 + ..., C)
"""
x_idxs = (keypoints[..., 0] -
self.point_cloud_range[0]) / self.voxel_size[0]
y_idxs = (keypoints[..., 1] -
self.point_cloud_range[1]) / self.voxel_size[1]
x_idxs = x_idxs / bev_scale_factor
y_idxs = y_idxs / bev_scale_factor
point_bev_features_list = []
for k in range(batch_size):
cur_x_idxs = x_idxs[k, ...]
cur_y_idxs = y_idxs[k, ...]
cur_bev_features = bev_features[k].permute(1, 2, 0) # (H, W, C)
point_bev_features = bilinear_interpolate_torch(
cur_bev_features, cur_x_idxs, cur_y_idxs)
point_bev_features_list.append(point_bev_features)
point_bev_features = torch.cat(
point_bev_features_list, dim=0) # (N1 + N2 + ..., C)
return point_bev_features.view(batch_size, keypoints.shape[1], -1)
def get_voxel_centers(self, coors: torch.Tensor,
scale_factor: float) -> torch.Tensor:
"""Get voxel centers coordinate.
Args:
coors (torch.Tensor): Coordinates of voxels shape is Nx(1+NDim),
where 1 represents the batch index.
scale_factor (float): Scale factor.
Returns:
torch.Tensor: Voxel centers coordinate with shape (N, 3).
"""
assert coors.shape[1] == 4
voxel_centers = coors[:, [3, 2, 1]].float() # (xyz)
voxel_size = torch.tensor(
self.voxel_size,
device=voxel_centers.device).float() * scale_factor
pc_range = torch.tensor(
self.point_cloud_range[0:3], device=voxel_centers.device).float()
voxel_centers = (voxel_centers + 0.5) * voxel_size + pc_range
return voxel_centers
def sample_key_points(self, points: List[torch.Tensor],
coors: torch.Tensor) -> torch.Tensor:
"""Sample key points from raw points cloud.
Args:
points (List[torch.Tensor]): Point cloud of each sample.
coors (torch.Tensor): Coordinates of voxels shape is Nx(1+NDim),
where 1 represents the batch index.
Returns:
torch.Tensor: (B, M, 3) Key points of each sample.
M is num_keypoints.
"""
assert points is not None or coors is not None
if self.voxel_center_as_source:
_src_points = self.get_voxel_centers(coors=coors, scale_factor=1)
batch_size = coors[-1, 0].item() + 1
src_points = [
_src_points[coors[:, 0] == b] for b in range(batch_size)
]
else:
src_points = [p[..., :3] for p in points]
keypoints_list = []
for points_to_sample in src_points:
num_points = points_to_sample.shape[0]
cur_pt_idxs = furthest_point_sample(
points_to_sample.unsqueeze(dim=0).contiguous(),
self.num_keypoints).long()[0]
if num_points < self.num_keypoints:
times = int(self.num_keypoints / num_points) + 1
non_empty = cur_pt_idxs[:num_points]
cur_pt_idxs = non_empty.repeat(times)[:self.num_keypoints]
keypoints = points_to_sample[cur_pt_idxs]
keypoints_list.append(keypoints)
keypoints = torch.stack(keypoints_list, dim=0) # (B, M, 3)
return keypoints
def forward(self, batch_inputs_dict: dict, feats_dict: dict,
rpn_results_list: InstanceList) -> dict:
"""Extract point-wise features from multi-input.
Args:
batch_inputs_dict (dict): The model input dict which include
'points', 'voxels' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- voxels (dict[torch.Tensor]): Voxels of the batch sample.
feats_dict (dict): Contains features from the first
stage.
rpn_results_list (List[:obj:`InstanceData`]): Detection results
of rpn head.
Returns:
dict: Contain Point-wise features, include:
- keypoints (torch.Tensor): Sampled key points.
- keypoint_features (torch.Tensor): Gathered key points
features from multi input.
- fusion_keypoint_features (torch.Tensor): Fusion
keypoint_features by point_feature_fusion_layer.
"""
points = batch_inputs_dict['points']
voxel_encode_features = feats_dict['multi_scale_3d_feats']
bev_encode_features = feats_dict['spatial_feats']
if self.voxel_center_as_source:
voxels_coors = batch_inputs_dict['voxels']['coors']
else:
voxels_coors = None
keypoints = self.sample_key_points(points, voxels_coors)
point_features_list = []
batch_size = len(points)
if self.bev_cfg is not None:
point_bev_features = self.interpolate_from_bev_features(
keypoints, bev_encode_features, batch_size,
self.bev_cfg.bev_scale_factor)
point_features_list.append(point_bev_features.contiguous())
batch_size, num_keypoints, _ = keypoints.shape
key_xyz = keypoints.view(-1, 3)
key_xyz_batch_cnt = key_xyz.new_zeros(batch_size).int().fill_(
num_keypoints)
if self.rawpoints_sa_layer is not None:
batch_points = torch.cat(points, dim=0)
batch_cnt = [len(p) for p in points]
xyz = batch_points[:, :3].contiguous()
features = None
if batch_points.size(1) > 0:
features = batch_points[:, 3:].contiguous()
xyz_batch_cnt = xyz.new_tensor(batch_cnt, dtype=torch.int32)
pooled_points, pooled_features = self.rawpoints_sa_layer(
xyz=xyz.contiguous(),
xyz_batch_cnt=xyz_batch_cnt,
new_xyz=key_xyz.contiguous(),
new_xyz_batch_cnt=key_xyz_batch_cnt,
features=features.contiguous(),
)
point_features_list.append(pooled_features.contiguous().view(
batch_size, num_keypoints, -1))
if self.voxel_sa_layers is not None:
for k, voxel_sa_layer in enumerate(self.voxel_sa_layers):
cur_coords = voxel_encode_features[k].indices
xyz = self.get_voxel_centers(
coors=cur_coords,
scale_factor=self.voxel_sa_configs_list[k].scale_factor
).contiguous()
xyz_batch_cnt = xyz.new_zeros(batch_size).int()
for bs_idx in range(batch_size):
xyz_batch_cnt[bs_idx] = (cur_coords[:, 0] == bs_idx).sum()
pooled_points, pooled_features = voxel_sa_layer(
xyz=xyz.contiguous(),
xyz_batch_cnt=xyz_batch_cnt,
new_xyz=key_xyz.contiguous(),
new_xyz_batch_cnt=key_xyz_batch_cnt,
features=voxel_encode_features[k].features.contiguous(),
)
point_features_list.append(pooled_features.contiguous().view(
batch_size, num_keypoints, -1))
point_features = torch.cat(
point_features_list, dim=-1).view(batch_size * num_keypoints, -1,
1)
fusion_point_features = self.point_feature_fusion_layer(
point_features.unsqueeze(dim=-1)).squeeze(dim=-1)
batch_idxs = torch.arange(
batch_size * num_keypoints, device=keypoints.device
) // num_keypoints # batch indexes of each key points
batch_keypoints_xyz = torch.cat(
(batch_idxs.to(key_xyz.dtype).unsqueeze(dim=-1), key_xyz), dim=-1)
return dict(
keypoint_features=point_features.squeeze(dim=-1),
fusion_keypoint_features=fusion_point_features.squeeze(dim=-1),
keypoints=batch_keypoints_xyz)
# Copyright (c) OpenMMLab. All rights reserved.
from mmdet.models.necks.fpn import FPN
from .dla_neck import DLANeck
from .imvoxel_neck import OutdoorImVoxelNeck
from .pointnet2_fp_neck import PointNetFPNeck
......
......@@ -5,10 +5,11 @@ from .h3d_roi_head import H3DRoIHead
from .mask_heads import PointwiseSemanticHead, PrimitiveHead
from .part_aggregation_roi_head import PartAggregationROIHead
from .point_rcnn_roi_head import PointRCNNRoIHead
from .pv_rcnn_roi_head import PVRCNNRoiHead
from .roi_extractors import Single3DRoIAwareExtractor, SingleRoIExtractor
__all__ = [
'Base3DRoIHead', 'PartAggregationROIHead', 'PointwiseSemanticHead',
'Single3DRoIAwareExtractor', 'PartA2BboxHead', 'SingleRoIExtractor',
'H3DRoIHead', 'PrimitiveHead', 'PointRCNNRoIHead'
'H3DRoIHead', 'PrimitiveHead', 'PointRCNNRoIHead', 'PVRCNNRoiHead'
]
# Copyright (c) OpenMMLab. All rights reserved.
from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet.models.roi_heads import BaseRoIHead
from mmdet3d.registry import MODELS, TASK_UTILS
class Base3DRoIHead(BaseRoIHead):
"""Base class for 3d RoIHeads."""
......
......@@ -3,12 +3,14 @@ from mmdet.models.roi_heads.bbox_heads import (BBoxHead, ConvFCBBoxHead,
DoubleConvFCBBoxHead,
Shared2FCBBoxHead,
Shared4Conv1FCBBoxHead)
from .h3d_bbox_head import H3DBboxHead
from .parta2_bbox_head import PartA2BboxHead
from .point_rcnn_bbox_head import PointRCNNBboxHead
from .pv_rcnn_bbox_head import PVRCNNBBoxHead
__all__ = [
'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead',
'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 'PartA2BboxHead',
'H3DBboxHead', 'PointRCNNBboxHead'
'H3DBboxHead', 'PointRCNNBboxHead', 'PVRCNNBBoxHead'
]
......@@ -3,6 +3,7 @@ from typing import Dict, List, Optional, Tuple
import torch
from mmcv.cnn import ConvModule
from mmdet.models.utils import multi_apply
from mmengine.model import BaseModule
from mmengine.structures import InstanceData
from torch import Tensor
......@@ -15,7 +16,6 @@ from mmdet3d.models.losses import chamfer_distance
from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet3d.structures import (BaseInstance3DBoxes, DepthInstance3DBoxes,
Det3DDataSample)
from mmdet.models.utils import multi_apply
@MODELS.register_module()
......
......@@ -4,6 +4,7 @@ from typing import Dict, List, Tuple
import numpy as np
import torch
from mmcv.cnn import ConvModule
from mmdet.models.utils import multi_apply
from mmengine.model import normal_init
from mmengine.structures import InstanceData
from torch import Tensor
......@@ -11,7 +12,6 @@ from torch import Tensor
from mmdet3d.models import make_sparse_convmodule
from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE
from mmdet3d.utils.typing import InstanceList
from mmdet.models.utils import multi_apply
if IS_SPCONV2_AVAILABLE:
from spconv.pytorch import (SparseConvTensor, SparseMaxPool3d,
......
......@@ -6,6 +6,7 @@ import torch
import torch.nn as nn
from mmcv.cnn import ConvModule
from mmcv.cnn.bricks import build_conv_layer
from mmdet.models.utils import multi_apply
from mmengine.model import BaseModule, normal_init
from mmengine.structures import InstanceData
from torch import Tensor
......@@ -16,7 +17,6 @@ from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet3d.structures.bbox_3d import (LiDARInstance3DBoxes,
rotation_3d_in_axis, xywhr2xyxyr)
from mmdet3d.utils.typing import InstanceList, SamplingResultList
from mmdet.models.utils import multi_apply
@MODELS.register_module()
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional, Tuple
import numpy as np
import torch
from mmcv.cnn import ConvModule
from mmdet.models.task_modules.samplers import SamplingResult
from mmdet.models.utils import multi_apply
from mmengine.model import BaseModule
from mmengine.structures import InstanceData
from torch import nn as nn
from mmdet3d.models.builder import build_loss
from mmdet3d.models.layers import nms_bev, nms_normal_bev
from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet3d.structures.bbox_3d import (LiDARInstance3DBoxes,
rotation_3d_in_axis, xywhr2xyxyr)
from mmdet3d.utils import InstanceList
@MODELS.register_module()
class PVRCNNBBoxHead(BaseModule):
"""PVRCNN BBox head.
Args:
in_channels (int): The number of input channel.
grid_size (int): The number of grid points in roi bbox.
num_classes (int): The number of classes.
class_agnostic (bool): Whether generate class agnostic prediction.
Defaults to True.
shared_fc_channels (tuple(int)): Out channels of each shared fc layer.
Defaults to (256, 256).
cls_channels (tuple(int)): Out channels of each classification layer.
Defaults to (256, 256).
reg_channels (tuple(int)): Out channels of each regression layer.
Defaults to (256, 256).
dropout_ratio (float): Ratio of dropout layer. Defaults to 0.5.
with_corner_loss (bool): Whether to use corner loss or not.
Defaults to True.
bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for box head.
Defaults to dict(type='DeltaXYZWLHRBBoxCoder').
norm_cfg (dict): Type of normalization method.
Defaults to dict(type='BN1d', eps=1e-5, momentum=0.1)
loss_bbox (dict): Config dict of box regression loss.
loss_cls (dict): Config dict of classifacation loss.
init_cfg (dict, optional): Initialize config of
model.
"""
def __init__(
self,
in_channels: int,
grid_size: int,
num_classes: int,
class_agnostic: bool = True,
shared_fc_channels: Tuple[int] = (256, 256),
cls_channels: Tuple[int] = (256, 256),
reg_channels: Tuple[int] = (256, 256),
dropout_ratio: float = 0.3,
with_corner_loss: bool = True,
bbox_coder: dict = dict(type='DeltaXYZWLHRBBoxCoder'),
norm_cfg: dict = dict(type='BN2d', eps=1e-5, momentum=0.1),
loss_bbox: dict = dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_cls: dict = dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=True,
reduction='none',
loss_weight=1.0),
init_cfg: Optional[dict] = dict(
type='Xavier', layer=['Conv2d', 'Conv1d'], distribution='uniform')
) -> None:
super(PVRCNNBBoxHead, self).__init__(init_cfg=init_cfg)
self.init_cfg = init_cfg
self.num_classes = num_classes
self.with_corner_loss = with_corner_loss
self.class_agnostic = class_agnostic
self.bbox_coder = TASK_UTILS.build(bbox_coder)
self.loss_bbox = build_loss(loss_bbox)
self.loss_cls = build_loss(loss_cls)
self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
cls_out_channels = 1 if class_agnostic else num_classes
self.reg_out_channels = self.bbox_coder.code_size * cls_out_channels
if self.use_sigmoid_cls:
self.cls_out_channels = cls_out_channels
else:
self.cls_out_channels = cls_out_channels + 1
self.dropout_ratio = dropout_ratio
self.grid_size = grid_size
# PVRCNNBBoxHead model in_channels is num of grid points in roi box.
in_channels *= (self.grid_size**3)
self.in_channels = in_channels
self.shared_fc_layer = self._make_fc_layers(
in_channels, shared_fc_channels,
range(len(shared_fc_channels) - 1), norm_cfg)
self.cls_layer = self._make_fc_layers(
shared_fc_channels[-1],
cls_channels,
range(1),
norm_cfg,
out_channels=self.cls_out_channels)
self.reg_layer = self._make_fc_layers(
shared_fc_channels[-1],
reg_channels,
range(1),
norm_cfg,
out_channels=self.reg_out_channels)
def _make_fc_layers(self,
in_channels: int,
fc_channels: list,
dropout_indices: list,
norm_cfg: dict,
out_channels: Optional[int] = None) -> torch.nn.Module:
"""Initial a full connection layer.
Args:
in_channels (int): Module in channels.
fc_channels (list): Full connection layer channels.
dropout_indices (list): Dropout indices.
norm_cfg (dict): Type of normalization method.
out_channels (int, optional): Module out channels.
"""
fc_layers = []
pre_channel = in_channels
for k in range(len(fc_channels)):
fc_layers.append(
ConvModule(
pre_channel,
fc_channels[k],
kernel_size=(1, 1),
stride=(1, 1),
norm_cfg=norm_cfg,
conv_cfg=dict(type='Conv2d'),
bias=False,
inplace=True))
pre_channel = fc_channels[k]
if self.dropout_ratio >= 0 and k in dropout_indices:
fc_layers.append(nn.Dropout(self.dropout_ratio))
if out_channels is not None:
fc_layers.append(
nn.Conv2d(fc_channels[-1], out_channels, 1, bias=True))
fc_layers = nn.Sequential(*fc_layers)
return fc_layers
def forward(self, feats: torch.Tensor) -> Tuple[torch.Tensor]:
"""Forward pvrcnn bbox head.
Args:
feats (torch.Tensor): Batch point-wise features.
Returns:
tuple[torch.Tensor]: Score of class and bbox predictions.
"""
# (B * N, 6, 6, 6, C)
rcnn_batch_size = feats.shape[0]
feats = feats.permute(0, 4, 1, 2,
3).contiguous().view(rcnn_batch_size, -1, 1, 1)
# (BxN, C*6*6*6)
shared_feats = self.shared_fc_layer(feats)
cls_score = self.cls_layer(shared_feats).transpose(
1, 2).contiguous().view(-1, self.cls_out_channels) # (B, 1)
bbox_pred = self.reg_layer(shared_feats).transpose(
1, 2).contiguous().view(-1, self.reg_out_channels) # (B, C)
return cls_score, bbox_pred
def loss(self, cls_score: torch.Tensor, bbox_pred: torch.Tensor,
rois: torch.Tensor, labels: torch.Tensor,
bbox_targets: torch.Tensor, pos_gt_bboxes: torch.Tensor,
reg_mask: torch.Tensor, label_weights: torch.Tensor,
bbox_weights: torch.Tensor) -> Dict:
"""Coumputing losses.
Args:
cls_score (torch.Tensor): Scores of each roi.
bbox_pred (torch.Tensor): Predictions of bboxes.
rois (torch.Tensor): Roi bboxes.
labels (torch.Tensor): Labels of class.
bbox_targets (torch.Tensor): Target of positive bboxes.
pos_gt_bboxes (torch.Tensor): Ground truths of positive bboxes.
reg_mask (torch.Tensor): Mask for positive bboxes.
label_weights (torch.Tensor): Weights of class loss.
bbox_weights (torch.Tensor): Weights of bbox loss.
Returns:
dict: Computed losses.
- loss_cls (torch.Tensor): Loss of classes.
- loss_bbox (torch.Tensor): Loss of bboxes.
- loss_corner (torch.Tensor): Loss of corners.
"""
losses = dict()
rcnn_batch_size = cls_score.shape[0]
# calculate class loss
cls_flat = cls_score.view(-1)
loss_cls = self.loss_cls(cls_flat, labels, label_weights)
losses['loss_cls'] = loss_cls
# calculate regression loss
code_size = self.bbox_coder.code_size
pos_inds = (reg_mask > 0)
if pos_inds.any() == 0:
# fake a part loss
losses['loss_bbox'] = 0 * bbox_pred.sum()
if self.with_corner_loss:
losses['loss_corner'] = 0 * bbox_pred.sum()
else:
pos_bbox_pred = bbox_pred.view(rcnn_batch_size, -1)[pos_inds]
bbox_weights_flat = bbox_weights[pos_inds].view(-1, 1).repeat(
1, pos_bbox_pred.shape[-1])
loss_bbox = self.loss_bbox(
pos_bbox_pred.unsqueeze(dim=0), bbox_targets.unsqueeze(dim=0),
bbox_weights_flat.unsqueeze(dim=0))
losses['loss_bbox'] = loss_bbox
if self.with_corner_loss:
pos_roi_boxes3d = rois[..., 1:].view(-1, code_size)[pos_inds]
pos_roi_boxes3d = pos_roi_boxes3d.view(-1, code_size)
batch_anchors = pos_roi_boxes3d.clone().detach()
pos_rois_rotation = pos_roi_boxes3d[..., 6].view(-1)
roi_xyz = pos_roi_boxes3d[..., 0:3].view(-1, 3)
batch_anchors[..., 0:3] = 0
# decode boxes
pred_boxes3d = self.bbox_coder.decode(
batch_anchors,
pos_bbox_pred.view(-1, code_size)).view(-1, code_size)
pred_boxes3d[..., 0:3] = rotation_3d_in_axis(
pred_boxes3d[..., 0:3].unsqueeze(1),
pos_rois_rotation,
axis=2).squeeze(1)
pred_boxes3d[:, 0:3] += roi_xyz
# calculate corner loss
loss_corner = self.get_corner_loss_lidar(
pred_boxes3d, pos_gt_bboxes)
losses['loss_corner'] = loss_corner.mean()
return losses
def get_targets(self,
sampling_results: SamplingResult,
rcnn_train_cfg: dict,
concat: bool = True) -> Tuple[torch.Tensor]:
"""Generate targets.
Args:
sampling_results (list[:obj:`SamplingResult`]):
Sampled results from rois.
rcnn_train_cfg (:obj:`ConfigDict`): Training config of rcnn.
concat (bool): Whether to concatenate targets between batches.
Returns:
tuple[torch.Tensor]: Targets of boxes and class prediction.
"""
pos_bboxes_list = [res.pos_bboxes for res in sampling_results]
pos_gt_bboxes_list = [res.pos_gt_bboxes for res in sampling_results]
iou_list = [res.iou for res in sampling_results]
targets = multi_apply(
self._get_target_single,
pos_bboxes_list,
pos_gt_bboxes_list,
iou_list,
cfg=rcnn_train_cfg)
(label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
bbox_weights) = targets
if concat:
label = torch.cat(label, 0)
bbox_targets = torch.cat(bbox_targets, 0)
pos_gt_bboxes = torch.cat(pos_gt_bboxes, 0)
reg_mask = torch.cat(reg_mask, 0)
label_weights = torch.cat(label_weights, 0)
label_weights /= torch.clamp(label_weights.sum(), min=1.0)
bbox_weights = torch.cat(bbox_weights, 0)
bbox_weights /= torch.clamp(bbox_weights.sum(), min=1.0)
return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
bbox_weights)
def _get_target_single(self, pos_bboxes: torch.Tensor,
pos_gt_bboxes: torch.Tensor, ious: torch.Tensor,
cfg: dict) -> Tuple[torch.Tensor]:
"""Generate training targets for a single sample.
Args:
pos_bboxes (torch.Tensor): Positive boxes with shape
(N, 7).
pos_gt_bboxes (torch.Tensor): Ground truth boxes with shape
(M, 7).
ious (torch.Tensor): IoU between `pos_bboxes` and `pos_gt_bboxes`
in shape (N, M).
cfg (dict): Training configs.
Returns:
tuple[torch.Tensor]: Target for positive boxes.
(label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
bbox_weights)
"""
cls_pos_mask = ious > cfg.cls_pos_thr
cls_neg_mask = ious < cfg.cls_neg_thr
interval_mask = (cls_pos_mask == 0) & (cls_neg_mask == 0)
# iou regression target
label = (cls_pos_mask > 0).float()
label[interval_mask] = ious[interval_mask] * 2 - 0.5
# label weights
label_weights = (label >= 0).float()
# box regression target
reg_mask = pos_bboxes.new_zeros(ious.size(0)).long()
reg_mask[0:pos_gt_bboxes.size(0)] = 1
bbox_weights = (reg_mask > 0).float()
if reg_mask.bool().any():
pos_gt_bboxes_ct = pos_gt_bboxes.clone().detach()
roi_center = pos_bboxes[..., 0:3]
roi_ry = pos_bboxes[..., 6] % (2 * np.pi)
# canonical transformation
pos_gt_bboxes_ct[..., 0:3] -= roi_center
pos_gt_bboxes_ct[..., 6] -= roi_ry
pos_gt_bboxes_ct[..., 0:3] = rotation_3d_in_axis(
pos_gt_bboxes_ct[..., 0:3].unsqueeze(1), -roi_ry,
axis=2).squeeze(1)
# flip orientation if rois have opposite orientation
ry_label = pos_gt_bboxes_ct[..., 6] % (2 * np.pi) # 0 ~ 2pi
opposite_flag = (ry_label > np.pi * 0.5) & (ry_label < np.pi * 1.5)
ry_label[opposite_flag] = (ry_label[opposite_flag] + np.pi) % (
2 * np.pi) # (0 ~ pi/2, 3pi/2 ~ 2pi)
flag = ry_label > np.pi
ry_label[flag] = ry_label[flag] - np.pi * 2 # (-pi/2, pi/2)
ry_label = torch.clamp(ry_label, min=-np.pi / 2, max=np.pi / 2)
pos_gt_bboxes_ct[..., 6] = ry_label
rois_anchor = pos_bboxes.clone().detach()
rois_anchor[:, 0:3] = 0
rois_anchor[:, 6] = 0
bbox_targets = self.bbox_coder.encode(rois_anchor,
pos_gt_bboxes_ct)
else:
# no fg bbox
bbox_targets = pos_gt_bboxes.new_empty((0, 7))
return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
bbox_weights)
def get_corner_loss_lidar(self,
pred_bbox3d: torch.Tensor,
gt_bbox3d: torch.Tensor,
delta: float = 1.0) -> torch.Tensor:
"""Calculate corner loss of given boxes.
Args:
pred_bbox3d (torch.FloatTensor): Predicted boxes in shape (N, 7).
gt_bbox3d (torch.FloatTensor): Ground truth boxes in shape (N, 7).
delta (float, optional): huber loss threshold. Defaults to 1.0
Returns:
torch.FloatTensor: Calculated corner loss in shape (N).
"""
assert pred_bbox3d.shape[0] == gt_bbox3d.shape[0]
# This is a little bit hack here because we assume the box for
# Part-A2 is in LiDAR coordinates
gt_boxes_structure = LiDARInstance3DBoxes(gt_bbox3d)
pred_box_corners = LiDARInstance3DBoxes(pred_bbox3d).corners
gt_box_corners = gt_boxes_structure.corners
# This flip only changes the heading direction of GT boxes
gt_bbox3d_flip = gt_boxes_structure.clone()
gt_bbox3d_flip.tensor[:, 6] += np.pi
gt_box_corners_flip = gt_bbox3d_flip.corners
corner_dist = torch.min(
torch.norm(pred_box_corners - gt_box_corners, dim=2),
torch.norm(pred_box_corners - gt_box_corners_flip,
dim=2)) # (N, 8)
# huber loss
abs_error = torch.abs(corner_dist)
corner_loss = torch.where(abs_error < delta,
0.5 * abs_error**2 / delta,
abs_error - 0.5 * delta)
return corner_loss.mean(dim=1)
def get_results(self,
rois: torch.Tensor,
cls_preds: torch.Tensor,
bbox_reg: torch.Tensor,
class_labels: torch.Tensor,
input_metas: List[dict],
test_cfg: dict = None) -> InstanceList:
"""Generate bboxes from bbox head predictions.
Args:
rois (torch.Tensor): Roi bounding boxes.
cls_preds (torch.Tensor): Scores of bounding boxes.
bbox_reg (torch.Tensor): Bounding boxes predictions
class_labels (torch.Tensor): Label of classes
input_metas (list[dict]): Point cloud meta info.
test_cfg (:obj:`ConfigDict`): Testing config.
Returns:
list[:obj:`InstanceData`]: Detection results of each sample
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, C), where
C >= 7.
"""
roi_batch_id = rois[..., 0]
roi_boxes = rois[..., 1:] # boxes without batch id
batch_size = int(roi_batch_id.max().item() + 1)
# decode boxes
roi_ry = roi_boxes[..., 6].view(-1)
roi_xyz = roi_boxes[..., 0:3].view(-1, 3)
local_roi_boxes = roi_boxes.clone().detach()
local_roi_boxes[..., 0:3] = 0
batch_box_preds = self.bbox_coder.decode(local_roi_boxes, bbox_reg)
batch_box_preds[..., 0:3] = rotation_3d_in_axis(
batch_box_preds[..., 0:3].unsqueeze(1), roi_ry, axis=2).squeeze(1)
batch_box_preds[:, 0:3] += roi_xyz
# post processing
result_list = []
for batch_id in range(batch_size):
cur_cls_preds = cls_preds[roi_batch_id == batch_id]
box_preds = batch_box_preds[roi_batch_id == batch_id]
label_preds = class_labels[batch_id]
cur_cls_preds = cur_cls_preds.sigmoid()
cur_cls_preds, _ = torch.max(cur_cls_preds, dim=-1)
selected = self.class_agnostic_nms(
scores=cur_cls_preds,
bbox_preds=box_preds,
input_meta=input_metas[batch_id],
nms_cfg=test_cfg)
selected_bboxes = box_preds[selected]
selected_label_preds = label_preds[selected]
selected_scores = cur_cls_preds[selected]
results = InstanceData()
results.bboxes_3d = input_metas[batch_id]['box_type_3d'](
selected_bboxes, self.bbox_coder.code_size)
results.scores_3d = selected_scores
results.labels_3d = selected_label_preds
result_list.append(results)
return result_list
def class_agnostic_nms(self, scores: torch.Tensor,
bbox_preds: torch.Tensor, nms_cfg: dict,
input_meta: dict) -> Tuple[torch.Tensor]:
"""Class agnostic NMS for box head.
Args:
scores (torch.Tensor): Object score of bounding boxes.
bbox_preds (torch.Tensor): Predicted bounding boxes.
nms_cfg (dict): NMS config dict.
input_meta (dict): Contain pcd and img's meta info.
Returns:
tuple[torch.Tensor]: Bounding boxes, scores and labels.
"""
obj_scores = scores.clone()
if nms_cfg.use_rotate_nms:
nms_func = nms_bev
else:
nms_func = nms_normal_bev
bbox = input_meta['box_type_3d'](
bbox_preds.clone(),
box_dim=bbox_preds.shape[-1],
with_yaw=True,
origin=(0.5, 0.5, 0.5))
if nms_cfg.score_thr is not None:
scores_mask = (obj_scores >= nms_cfg.score_thr)
obj_scores = obj_scores[scores_mask]
bbox = bbox[scores_mask]
selected = []
if obj_scores.shape[0] > 0:
box_scores_nms, indices = torch.topk(
obj_scores, k=min(4096, obj_scores.shape[0]))
bbox_bev = bbox.bev[indices]
bbox_for_nms = xywhr2xyxyr(bbox_bev)
keep = nms_func(bbox_for_nms, box_scores_nms, nms_cfg.nms_thr)
selected = indices[keep]
if nms_cfg.score_thr is not None:
original_idxs = scores_mask.nonzero().view(-1)
selected = original_idxs[selected]
return selected
# Copyright (c) OpenMMLab. All rights reserved.
from .foreground_segmentation_head import ForegroundSegmentationHead
from .pointwise_semantic_head import PointwiseSemanticHead
from .primitive_head import PrimitiveHead
__all__ = ['PointwiseSemanticHead', 'PrimitiveHead']
__all__ = [
'PointwiseSemanticHead', 'PrimitiveHead', 'ForegroundSegmentationHead'
]
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, Optional, Tuple
import torch
from mmcv.cnn.bricks import build_norm_layer
from mmdet.models.utils import multi_apply
from mmengine.model import BaseModule
from mmengine.structures import InstanceData
from torch import nn as nn
from mmdet3d.models.builder import build_loss
from mmdet3d.registry import MODELS
from mmdet3d.utils import InstanceList
@MODELS.register_module()
class ForegroundSegmentationHead(BaseModule):
"""Foreground segmentation head.
Args:
in_channels (int): The number of input channel.
mlp_channels (tuple[int]): Specify of mlp channels. Defaults
to (256, 256).
extra_width (float): Boxes enlarge width. Default used 0.1.
norm_cfg (dict): Type of normalization method. Defaults to
dict(type='BN1d', eps=1e-5, momentum=0.1).
init_cfg (dict, optional): Initialize config of
model. Defaults to None.
loss_seg (dict): Config of segmentation loss. Defaults to
dict(type='mmdet.FocalLoss')
"""
def __init__(
self,
in_channels: int,
mlp_channels: Tuple[int] = (256, 256),
extra_width: float = 0.1,
norm_cfg: dict = dict(type='BN1d', eps=1e-5, momentum=0.1),
init_cfg: Optional[dict] = None,
loss_seg: dict = dict(
type='mmdet.FocalLoss',
use_sigmoid=True,
reduction='sum',
gamma=2.0,
alpha=0.25,
activated=True,
loss_weight=1.0)
) -> None:
super(ForegroundSegmentationHead, self).__init__(init_cfg=init_cfg)
self.extra_width = extra_width
self.num_classes = 1
self.in_channels = in_channels
self.use_sigmoid_cls = loss_seg.get('use_sigmoid', False)
out_channels = 1
if self.use_sigmoid_cls:
self.out_channels = out_channels
else:
self.out_channels = out_channels + 1
mlps_layers = []
cin = in_channels
for mlp in mlp_channels:
mlps_layers.extend([
nn.Linear(cin, mlp, bias=False),
build_norm_layer(norm_cfg, mlp)[1],
nn.ReLU()
])
cin = mlp
mlps_layers.append(nn.Linear(cin, self.out_channels, bias=True))
self.seg_cls_layer = nn.Sequential(*mlps_layers)
self.loss_seg = build_loss(loss_seg)
def forward(self, feats: torch.Tensor) -> dict:
"""Forward head.
Args:
feats (torch.Tensor): Point-wise features.
Returns:
dict: Segment predictions.
"""
seg_preds = self.seg_cls_layer(feats)
return dict(seg_preds=seg_preds)
def _get_targets_single(self, point_xyz: torch.Tensor,
gt_bboxes_3d: InstanceData,
gt_labels_3d: torch.Tensor) -> torch.Tensor:
"""generate segmentation targets for a single sample.
Args:
point_xyz (torch.Tensor): Coordinate of points.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in
shape (box_num, 7).
gt_labels_3d (torch.Tensor): Class labels of ground truths in
shape (box_num).
Returns:
torch.Tensor: Points class labels.
"""
point_cls_labels_single = point_xyz.new_zeros(
point_xyz.shape[0]).long()
enlarged_gt_boxes = gt_bboxes_3d.enlarged_box(self.extra_width)
box_idxs_of_pts = gt_bboxes_3d.points_in_boxes_part(point_xyz).long()
extend_box_idxs_of_pts = enlarged_gt_boxes.points_in_boxes_part(
point_xyz).long()
box_fg_flag = box_idxs_of_pts >= 0
fg_flag = box_fg_flag.clone()
ignore_flag = fg_flag ^ (extend_box_idxs_of_pts >= 0)
point_cls_labels_single[ignore_flag] = -1
gt_box_of_fg_points = gt_labels_3d[box_idxs_of_pts[fg_flag]]
point_cls_labels_single[
fg_flag] = 1 if self.num_classes == 1 else\
gt_box_of_fg_points.long()
return point_cls_labels_single,
def get_targets(self, points_bxyz: torch.Tensor,
batch_gt_instances_3d: InstanceList) -> dict:
"""Generate segmentation targets.
Args:
points_bxyz (torch.Tensor): The coordinates of point in shape
(B, num_points, 3).
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances. It usually includes ``bboxes_3d`` and
``labels_3d`` attributes.
Returns:
dict: Prediction targets
- seg_targets (torch.Tensor): Segmentation targets.
"""
batch_size = len(batch_gt_instances_3d)
points_xyz_list = []
gt_bboxes_3d = []
gt_labels_3d = []
for idx in range(batch_size):
coords_idx = points_bxyz[:, 0] == idx
points_xyz_list.append(points_bxyz[coords_idx][..., 1:])
gt_bboxes_3d.append(batch_gt_instances_3d[idx].bboxes_3d)
gt_labels_3d.append(batch_gt_instances_3d[idx].labels_3d)
seg_targets, = multi_apply(self._get_targets_single, points_xyz_list,
gt_bboxes_3d, gt_labels_3d)
seg_targets = torch.cat(seg_targets, dim=0)
return dict(seg_targets=seg_targets)
def loss(self, semantic_results: dict,
semantic_targets: dict) -> Dict[str, torch.Tensor]:
"""Calculate point-wise segmentation losses.
Args:
semantic_results (dict): Results from semantic head.
semantic_targets (dict): Targets of semantic results.
Returns:
dict: Loss of segmentation.
- loss_semantic (torch.Tensor): Segmentation prediction loss.
"""
seg_preds = semantic_results['seg_preds']
seg_targets = semantic_targets['seg_targets']
positives = (seg_targets > 0)
negative_cls_weights = (seg_targets == 0).float()
seg_weights = (negative_cls_weights + 1.0 * positives).float()
pos_normalizer = positives.sum(dim=0).float()
seg_weights /= torch.clamp(pos_normalizer, min=1.0)
seg_preds = torch.sigmoid(seg_preds)
loss_seg = self.loss_seg(seg_preds, (~positives).long(), seg_weights)
return dict(loss_semantic=loss_seg)
......@@ -2,6 +2,7 @@
from typing import Dict, Optional, Tuple
import torch
from mmdet.models.utils import multi_apply
from mmengine.model import BaseModule
from torch import Tensor
from torch import nn as nn
......@@ -10,7 +11,6 @@ from torch.nn import functional as F
from mmdet3d.registry import MODELS
from mmdet3d.structures.bbox_3d import BaseInstance3DBoxes, rotation_3d_in_axis
from mmdet3d.utils import InstanceList
from mmdet.models.utils import multi_apply
@MODELS.register_module()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment