Unverified Commit 32a4328b authored by Wenwei Zhang's avatar Wenwei Zhang Committed by GitHub
Browse files

Bump version to V1.0.0rc0

Bump version to V1.0.0rc0
parents 86cc487c a8817998
...@@ -2,10 +2,13 @@ ...@@ -2,10 +2,13 @@
from mmdet.models.losses import FocalLoss, SmoothL1Loss, binary_cross_entropy from mmdet.models.losses import FocalLoss, SmoothL1Loss, binary_cross_entropy
from .axis_aligned_iou_loss import AxisAlignedIoULoss, axis_aligned_iou_loss from .axis_aligned_iou_loss import AxisAlignedIoULoss, axis_aligned_iou_loss
from .chamfer_distance import ChamferDistance, chamfer_distance from .chamfer_distance import ChamferDistance, chamfer_distance
from .multibin_loss import MultiBinLoss
from .paconv_regularization_loss import PAConvRegularizationLoss from .paconv_regularization_loss import PAConvRegularizationLoss
from .uncertain_smooth_l1_loss import UncertainL1Loss, UncertainSmoothL1Loss
__all__ = [ __all__ = [
'FocalLoss', 'SmoothL1Loss', 'binary_cross_entropy', 'ChamferDistance', 'FocalLoss', 'SmoothL1Loss', 'binary_cross_entropy', 'ChamferDistance',
'chamfer_distance', 'axis_aligned_iou_loss', 'AxisAlignedIoULoss', 'chamfer_distance', 'axis_aligned_iou_loss', 'AxisAlignedIoULoss',
'PAConvRegularizationLoss' 'PAConvRegularizationLoss', 'UncertainL1Loss', 'UncertainSmoothL1Loss',
'MultiBinLoss'
] ]
...@@ -54,7 +54,7 @@ class AxisAlignedIoULoss(nn.Module): ...@@ -54,7 +54,7 @@ class AxisAlignedIoULoss(nn.Module):
Args: Args:
pred (torch.Tensor): Bbox predictions with shape [..., 3]. pred (torch.Tensor): Bbox predictions with shape [..., 3].
target (torch.Tensor): Bbox targets (gt) with shape [..., 3]. target (torch.Tensor): Bbox targets (gt) with shape [..., 3].
weight (torch.Tensor|float, optional): Weight of loss. \ weight (torch.Tensor | float, optional): Weight of loss.
Defaults to None. Defaults to None.
avg_factor (int, optional): Average factor that is used to average avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None. the loss. Defaults to None.
......
...@@ -29,13 +29,13 @@ def chamfer_distance(src, ...@@ -29,13 +29,13 @@ def chamfer_distance(src,
Returns: Returns:
tuple: Source and Destination loss with the corresponding indices. tuple: Source and Destination loss with the corresponding indices.
- loss_src (torch.Tensor): The min distance \ - loss_src (torch.Tensor): The min distance
from source to destination. from source to destination.
- loss_dst (torch.Tensor): The min distance \ - loss_dst (torch.Tensor): The min distance
from destination to source. from destination to source.
- indices1 (torch.Tensor): Index the min distance point \ - indices1 (torch.Tensor): Index the min distance point
for each point in source to destination. for each point in source to destination.
- indices2 (torch.Tensor): Index the min distance point \ - indices2 (torch.Tensor): Index the min distance point
for each point in destination to source. for each point in destination to source.
""" """
...@@ -125,10 +125,10 @@ class ChamferDistance(nn.Module): ...@@ -125,10 +125,10 @@ class ChamferDistance(nn.Module):
Defaults to False. Defaults to False.
Returns: Returns:
tuple[torch.Tensor]: If ``return_indices=True``, return losses of \ tuple[torch.Tensor]: If ``return_indices=True``, return losses of
source and target with their corresponding indices in the \ source and target with their corresponding indices in the
order of ``(loss_source, loss_target, indices1, indices2)``. \ order of ``(loss_source, loss_target, indices1, indices2)``.
If ``return_indices=False``, return \ If ``return_indices=False``, return
``(loss_source, loss_target)``. ``(loss_source, loss_target)``.
""" """
assert reduction_override in (None, 'none', 'mean', 'sum') assert reduction_override in (None, 'none', 'mean', 'sum')
......
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from torch import nn as nn
from torch.nn import functional as F
from mmdet.models.builder import LOSSES
from mmdet.models.losses.utils import weighted_loss
@weighted_loss
def multibin_loss(pred_orientations, gt_orientations, num_dir_bins=4):
"""Multi-Bin Loss.
Args:
pred_orientations(torch.Tensor): Predicted local vector
orientation in [axis_cls, head_cls, sin, cos] format.
shape (N, num_dir_bins * 4)
gt_orientations(torch.Tensor): Corresponding gt bboxes,
shape (N, num_dir_bins * 2).
num_dir_bins(int, optional): Number of bins to encode
direction angle.
Defaults: 4.
Return:
torch.Tensor: Loss tensor.
"""
cls_losses = 0
reg_losses = 0
reg_cnt = 0
for i in range(num_dir_bins):
# bin cls loss
cls_ce_loss = F.cross_entropy(
pred_orientations[:, (i * 2):(i * 2 + 2)],
gt_orientations[:, i].long(),
reduction='mean')
# regression loss
valid_mask_i = (gt_orientations[:, i] == 1)
cls_losses += cls_ce_loss
if valid_mask_i.sum() > 0:
start = num_dir_bins * 2 + i * 2
end = start + 2
pred_offset = F.normalize(pred_orientations[valid_mask_i,
start:end])
gt_offset_sin = torch.sin(gt_orientations[valid_mask_i,
num_dir_bins + i])
gt_offset_cos = torch.cos(gt_orientations[valid_mask_i,
num_dir_bins + i])
reg_loss = \
F.l1_loss(pred_offset[:, 0], gt_offset_sin,
reduction='none') + \
F.l1_loss(pred_offset[:, 1], gt_offset_cos,
reduction='none')
reg_losses += reg_loss.sum()
reg_cnt += valid_mask_i.sum()
return cls_losses / num_dir_bins + reg_losses / reg_cnt
@LOSSES.register_module()
class MultiBinLoss(nn.Module):
"""Multi-Bin Loss for orientation.
Args:
reduction (str, optional): The method to reduce the loss.
Options are 'none', 'mean' and 'sum'. Defaults to 'none'.
loss_weight (float, optional): The weight of loss. Defaults
to 1.0.
"""
def __init__(self, reduction='none', loss_weight=1.0):
super(MultiBinLoss, self).__init__()
assert reduction in ['none', 'sum', 'mean']
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self, pred, target, num_dir_bins, reduction_override=None):
"""Forward function.
Args:
pred (torch.Tensor): The prediction.
target (torch.Tensor): The learning target of the prediction.
num_dir_bins (int): Number of bins to encode direction angle.
reduction_override (str, optional): The reduction method used to
override the original reduction method of the loss.
Defaults to None.
"""
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
loss = self.loss_weight * multibin_loss(
pred, target, num_dir_bins=num_dir_bins, reduction=reduction)
return loss
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from torch import nn as nn
from mmdet.models.builder import LOSSES
from mmdet.models.losses.utils import weighted_loss
@weighted_loss
def uncertain_smooth_l1_loss(pred, target, sigma, alpha=1.0, beta=1.0):
"""Smooth L1 loss with uncertainty.
Args:
pred (torch.Tensor): The prediction.
target (torch.Tensor): The learning target of the prediction.
sigma (torch.Tensor): The sigma for uncertainty.
alpha (float, optional): The coefficient of log(sigma).
Defaults to 1.0.
beta (float, optional): The threshold in the piecewise function.
Defaults to 1.0.
Returns:
torch.Tensor: Calculated loss
"""
assert beta > 0
assert target.numel() > 0
assert pred.size() == target.size() == sigma.size(), 'The size of pred ' \
f'{pred.size()}, target {target.size()}, and sigma {sigma.size()} ' \
'are inconsistent.'
diff = torch.abs(pred - target)
loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
diff - 0.5 * beta)
loss = torch.exp(-sigma) * loss + alpha * sigma
return loss
@weighted_loss
def uncertain_l1_loss(pred, target, sigma, alpha=1.0):
"""L1 loss with uncertainty.
Args:
pred (torch.Tensor): The prediction.
target (torch.Tensor): The learning target of the prediction.
sigma (torch.Tensor): The sigma for uncertainty.
alpha (float, optional): The coefficient of log(sigma).
Defaults to 1.0.
Returns:
torch.Tensor: Calculated loss
"""
assert target.numel() > 0
assert pred.size() == target.size() == sigma.size(), 'The size of pred ' \
f'{pred.size()}, target {target.size()}, and sigma {sigma.size()} ' \
'are inconsistent.'
loss = torch.abs(pred - target)
loss = torch.exp(-sigma) * loss + alpha * sigma
return loss
@LOSSES.register_module()
class UncertainSmoothL1Loss(nn.Module):
r"""Smooth L1 loss with uncertainty.
Please refer to `PGD <https://arxiv.org/abs/2107.14160>`_ and
`Multi-Task Learning Using Uncertainty to Weigh Losses for Scene Geometry
and Semantics <https://arxiv.org/abs/1705.07115>`_ for more details.
Args:
alpha (float, optional): The coefficient of log(sigma).
Defaults to 1.0.
beta (float, optional): The threshold in the piecewise function.
Defaults to 1.0.
reduction (str, optional): The method to reduce the loss.
Options are 'none', 'mean' and 'sum'. Defaults to 'mean'.
loss_weight (float, optional): The weight of loss. Defaults to 1.0
"""
def __init__(self, alpha=1.0, beta=1.0, reduction='mean', loss_weight=1.0):
super(UncertainSmoothL1Loss, self).__init__()
assert reduction in ['none', 'sum', 'mean']
self.alpha = alpha
self.beta = beta
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred,
target,
sigma,
weight=None,
avg_factor=None,
reduction_override=None,
**kwargs):
"""Forward function.
Args:
pred (torch.Tensor): The prediction.
target (torch.Tensor): The learning target of the prediction.
sigma (torch.Tensor): The sigma for uncertainty.
weight (torch.Tensor, optional): The weight of loss for each
prediction. Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction_override (str, optional): The reduction method used to
override the original reduction method of the loss.
Defaults to None.
"""
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
loss_bbox = self.loss_weight * uncertain_smooth_l1_loss(
pred,
target,
weight,
sigma=sigma,
alpha=self.alpha,
beta=self.beta,
reduction=reduction,
avg_factor=avg_factor,
**kwargs)
return loss_bbox
@LOSSES.register_module()
class UncertainL1Loss(nn.Module):
"""L1 loss with uncertainty.
Args:
alpha (float, optional): The coefficient of log(sigma).
Defaults to 1.0.
reduction (str, optional): The method to reduce the loss.
Options are 'none', 'mean' and 'sum'. Defaults to 'mean'.
loss_weight (float, optional): The weight of loss. Defaults to 1.0.
"""
def __init__(self, alpha=1.0, reduction='mean', loss_weight=1.0):
super(UncertainL1Loss, self).__init__()
assert reduction in ['none', 'sum', 'mean']
self.alpha = alpha
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred,
target,
sigma,
weight=None,
avg_factor=None,
reduction_override=None):
"""Forward function.
Args:
pred (torch.Tensor): The prediction.
target (torch.Tensor): The learning target of the prediction.
sigma (torch.Tensor): The sigma for uncertainty.
weight (torch.Tensor, optional): The weight of loss for each
prediction. Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction_override (str, optional): The reduction method used to
override the original reduction method of the loss.
Defaults to None.
"""
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
loss_bbox = self.loss_weight * uncertain_l1_loss(
pred,
target,
weight,
sigma=sigma,
alpha=self.alpha,
reduction=reduction,
avg_factor=avg_factor)
return loss_bbox
...@@ -14,19 +14,21 @@ class SparseEncoder(nn.Module): ...@@ -14,19 +14,21 @@ class SparseEncoder(nn.Module):
Args: Args:
in_channels (int): The number of input channels. in_channels (int): The number of input channels.
sparse_shape (list[int]): The sparse shape of input tensor. sparse_shape (list[int]): The sparse shape of input tensor.
order (list[str]): Order of conv module. Defaults to ('conv', order (list[str], optional): Order of conv module.
'norm', 'act'). Defaults to ('conv', 'norm', 'act').
norm_cfg (dict): Config of normalization layer. Defaults to norm_cfg (dict, optional): Config of normalization layer. Defaults to
dict(type='BN1d', eps=1e-3, momentum=0.01). dict(type='BN1d', eps=1e-3, momentum=0.01).
base_channels (int): Out channels for conv_input layer. base_channels (int, optional): Out channels for conv_input layer.
Defaults to 16. Defaults to 16.
output_channels (int): Out channels for conv_out layer. output_channels (int, optional): Out channels for conv_out layer.
Defaults to 128. Defaults to 128.
encoder_channels (tuple[tuple[int]]): encoder_channels (tuple[tuple[int]], optional):
Convolutional channels of each encode block. Convolutional channels of each encode block.
encoder_paddings (tuple[tuple[int]]): Paddings of each encode block. encoder_paddings (tuple[tuple[int]], optional):
Paddings of each encode block.
Defaults to ((16, ), (32, 32, 32), (64, 64, 64), (64, 64, 64)). Defaults to ((16, ), (32, 32, 32), (64, 64, 64), (64, 64, 64)).
block_type (str): Type of the block to use. Defaults to 'conv_module'. block_type (str, optional): Type of the block to use.
Defaults to 'conv_module'.
""" """
def __init__(self, def __init__(self,
...@@ -99,7 +101,7 @@ class SparseEncoder(nn.Module): ...@@ -99,7 +101,7 @@ class SparseEncoder(nn.Module):
Args: Args:
voxel_features (torch.float32): Voxel features in shape (N, C). voxel_features (torch.float32): Voxel features in shape (N, C).
coors (torch.int32): Coordinates in shape (N, 4), \ coors (torch.int32): Coordinates in shape (N, 4),
the columns in the order of (batch_idx, z_idx, y_idx, x_idx). the columns in the order of (batch_idx, z_idx, y_idx, x_idx).
batch_size (int): Batch size. batch_size (int): Batch size.
...@@ -139,9 +141,9 @@ class SparseEncoder(nn.Module): ...@@ -139,9 +141,9 @@ class SparseEncoder(nn.Module):
make_block (method): A bounded function to build blocks. make_block (method): A bounded function to build blocks.
norm_cfg (dict[str]): Config of normalization layer. norm_cfg (dict[str]): Config of normalization layer.
in_channels (int): The number of encoder input channels. in_channels (int): The number of encoder input channels.
block_type (str): Type of the block to use. Defaults to block_type (str, optional): Type of the block to use.
'conv_module'. Defaults to 'conv_module'.
conv_cfg (dict): Config of conv layer. Defaults to conv_cfg (dict, optional): Config of conv layer. Defaults to
dict(type='SubMConv3d'). dict(type='SubMConv3d').
Returns: Returns:
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from .edge_fusion_module import EdgeFusionModule
from .transformer import GroupFree3DMHA from .transformer import GroupFree3DMHA
from .vote_module import VoteModule from .vote_module import VoteModule
__all__ = ['VoteModule', 'GroupFree3DMHA'] __all__ = ['VoteModule', 'GroupFree3DMHA', 'EdgeFusionModule']
# Copyright (c) OpenMMLab. All rights reserved.
from mmcv.cnn import ConvModule
from mmcv.runner import BaseModule
from torch import nn as nn
from torch.nn import functional as F
class EdgeFusionModule(BaseModule):
"""Edge Fusion Module for feature map.
Args:
out_channels (int): The number of output channels.
feat_channels (int): The number of channels in feature map
during edge feature fusion.
kernel_size (int, optional): Kernel size of convolution.
Default: 3.
act_cfg (dict, optional): Config of activation.
Default: dict(type='ReLU').
norm_cfg (dict, optional): Config of normalization.
Default: dict(type='BN1d')).
"""
def __init__(self,
out_channels,
feat_channels,
kernel_size=3,
act_cfg=dict(type='ReLU'),
norm_cfg=dict(type='BN1d')):
super().__init__()
self.edge_convs = nn.Sequential(
ConvModule(
feat_channels,
feat_channels,
kernel_size=kernel_size,
padding=kernel_size // 2,
conv_cfg=dict(type='Conv1d'),
norm_cfg=norm_cfg,
act_cfg=act_cfg),
nn.Conv1d(feat_channels, out_channels, kernel_size=1))
self.feat_channels = feat_channels
def forward(self, features, fused_features, edge_indices, edge_lens,
output_h, output_w):
"""Forward pass.
Args:
features (torch.Tensor): Different representative features
for fusion.
fused_features (torch.Tensor): Different representative
features to be fused.
edge_indices (torch.Tensor): Batch image edge indices.
edge_lens (list[int]): List of edge length of each image.
output_h (int): Height of output feature map.
output_w (int): Width of output feature map.
Returns:
torch.Tensor: Fused feature maps.
"""
batch_size = features.shape[0]
# normalize
grid_edge_indices = edge_indices.view(batch_size, -1, 1, 2).float()
grid_edge_indices[..., 0] = \
grid_edge_indices[..., 0] / (output_w - 1) * 2 - 1
grid_edge_indices[..., 1] = \
grid_edge_indices[..., 1] / (output_h - 1) * 2 - 1
# apply edge fusion
edge_features = F.grid_sample(
features, grid_edge_indices, align_corners=True).squeeze(-1)
edge_output = self.edge_convs(edge_features)
for k in range(batch_size):
edge_indice_k = edge_indices[k, :edge_lens[k]]
fused_features[k, :, edge_indice_k[:, 1],
edge_indice_k[:, 0]] += edge_output[
k, :, :edge_lens[k]]
return fused_features
...@@ -15,15 +15,16 @@ class GroupFree3DMHA(MultiheadAttention): ...@@ -15,15 +15,16 @@ class GroupFree3DMHA(MultiheadAttention):
embed_dims (int): The embedding dimension. embed_dims (int): The embedding dimension.
num_heads (int): Parallel attention heads. Same as num_heads (int): Parallel attention heads. Same as
`nn.MultiheadAttention`. `nn.MultiheadAttention`.
attn_drop (float): A Dropout layer on attn_output_weights. Default 0.0. attn_drop (float, optional): A Dropout layer on attn_output_weights.
proj_drop (float): A Dropout layer. Default 0.0. Defaults to 0.0.
dropout_layer (obj:`ConfigDict`): The dropout_layer used proj_drop (float, optional): A Dropout layer. Defaults to 0.0.
dropout_layer (obj:`ConfigDict`, optional): The dropout_layer used
when adding the shortcut. when adding the shortcut.
init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. init_cfg (obj:`mmcv.ConfigDict`, optional): The Config for
Default: None. initialization. Default: None.
batch_first (bool): Key, Query and Value are shape of batch_first (bool, optional): Key, Query and Value are shape of
(batch, n, embed_dim) (batch, n, embed_dim)
or (n, batch, embed_dim). Default to False. or (n, batch, embed_dim). Defaults to False.
""" """
def __init__(self, def __init__(self,
...@@ -58,26 +59,26 @@ class GroupFree3DMHA(MultiheadAttention): ...@@ -58,26 +59,26 @@ class GroupFree3DMHA(MultiheadAttention):
embed_dims]. Same in `nn.MultiheadAttention.forward`. embed_dims]. Same in `nn.MultiheadAttention.forward`.
key (Tensor): The key tensor with shape [num_keys, bs, key (Tensor): The key tensor with shape [num_keys, bs,
embed_dims]. Same in `nn.MultiheadAttention.forward`. embed_dims]. Same in `nn.MultiheadAttention.forward`.
If None, the ``query`` will be used. Defaults to None. If None, the ``query`` will be used.
value (Tensor): The value tensor with same shape as `key`. value (Tensor): The value tensor with same shape as `key`.
Same in `nn.MultiheadAttention.forward`. Defaults to None. Same in `nn.MultiheadAttention.forward`.
If None, the `key` will be used. If None, the `key` will be used.
identity (Tensor): This tensor, with the same shape as x, identity (Tensor): This tensor, with the same shape as x,
will be used for the identity link. will be used for the identity link. If None, `x` will be used.
If None, `x` will be used. Defaults to None. query_pos (Tensor, optional): The positional encoding for query,
query_pos (Tensor): The positional encoding for query, with with the same shape as `x`. Defaults to None.
the same shape as `x`. If not None, it will If not None, it will be added to `x` before forward function.
be added to `x` before forward function. Defaults to None. key_pos (Tensor, optional): The positional encoding for `key`,
key_pos (Tensor): The positional encoding for `key`, with the with the same shape as `key`. Defaults to None. If not None,
same shape as `key`. Defaults to None. If not None, it will it will be added to `key` before forward function. If None,
be added to `key` before forward function. If None, and and `query_pos` has the same shape as `key`, then `query_pos`
`query_pos` has the same shape as `key`, then `query_pos`
will be used for `key_pos`. Defaults to None. will be used for `key_pos`. Defaults to None.
attn_mask (Tensor): ByteTensor mask with shape [num_queries, attn_mask (Tensor, optional): ByteTensor mask with shape
num_keys]. Same in `nn.MultiheadAttention.forward`. [num_queries, num_keys].
Defaults to None.
key_padding_mask (Tensor): ByteTensor with shape [bs, num_keys].
Same in `nn.MultiheadAttention.forward`. Defaults to None. Same in `nn.MultiheadAttention.forward`. Defaults to None.
key_padding_mask (Tensor, optional): ByteTensor with shape
[bs, num_keys]. Same in `nn.MultiheadAttention.forward`.
Defaults to None.
Returns: Returns:
Tensor: forwarded results with shape [num_queries, bs, embed_dims]. Tensor: forwarded results with shape [num_queries, bs, embed_dims].
...@@ -113,7 +114,7 @@ class ConvBNPositionalEncoding(nn.Module): ...@@ -113,7 +114,7 @@ class ConvBNPositionalEncoding(nn.Module):
Args: Args:
input_channel (int): input features dim. input_channel (int): input features dim.
num_pos_feats (int): output position features dim. num_pos_feats (int, optional): output position features dim.
Defaults to 288 to be consistent with seed features dim. Defaults to 288 to be consistent with seed features dim.
""" """
...@@ -131,7 +132,7 @@ class ConvBNPositionalEncoding(nn.Module): ...@@ -131,7 +132,7 @@ class ConvBNPositionalEncoding(nn.Module):
xyz (Tensor): (B, N, 3) the coordinates to embed. xyz (Tensor): (B, N, 3) the coordinates to embed.
Returns: Returns:
Tensor: (B, num_pos_feats, N) the embeded position features. Tensor: (B, num_pos_feats, N) the embedded position features.
""" """
xyz = xyz.permute(0, 2, 1) xyz = xyz.permute(0, 2, 1)
position_embedding = self.position_embedding_head(xyz) position_embedding = self.position_embedding_head(xyz)
......
...@@ -14,22 +14,25 @@ class VoteModule(nn.Module): ...@@ -14,22 +14,25 @@ class VoteModule(nn.Module):
Args: Args:
in_channels (int): Number of channels of seed point features. in_channels (int): Number of channels of seed point features.
vote_per_seed (int): Number of votes generated from each seed point. vote_per_seed (int, optional): Number of votes generated from
gt_per_seed (int): Number of ground truth votes generated each seed point. Default: 1.
from each seed point. gt_per_seed (int, optional): Number of ground truth votes generated
num_points (int): Number of points to be used for voting. from each seed point. Default: 3.
conv_channels (tuple[int]): Out channels of vote num_points (int, optional): Number of points to be used for voting.
generating convolution. Default: 1.
conv_cfg (dict): Config of convolution. conv_channels (tuple[int], optional): Out channels of vote
generating convolution. Default: (16, 16).
conv_cfg (dict, optional): Config of convolution.
Default: dict(type='Conv1d'). Default: dict(type='Conv1d').
norm_cfg (dict): Config of normalization. norm_cfg (dict, optional): Config of normalization.
Default: dict(type='BN1d'). Default: dict(type='BN1d').
norm_feats (bool): Whether to normalize features. norm_feats (bool, optional): Whether to normalize features.
Default: True. Default: True.
with_res_feat (bool): Whether to predict residual features. with_res_feat (bool, optional): Whether to predict residual features.
Default: True. Default: True.
vote_xyz_range (list[float], None): The range of points translation. vote_xyz_range (list[float], optional):
vote_loss (dict): Config of vote loss. The range of points translation. Default: None.
vote_loss (dict, optional): Config of vote loss. Default: None.
""" """
def __init__(self, def __init__(self,
...@@ -95,10 +98,10 @@ class VoteModule(nn.Module): ...@@ -95,10 +98,10 @@ class VoteModule(nn.Module):
Returns: Returns:
tuple[torch.Tensor]: tuple[torch.Tensor]:
- vote_points: Voted xyz based on the seed points \ - vote_points: Voted xyz based on the seed points
with shape (B, M, 3), ``M=num_seed*vote_per_seed``. with shape (B, M, 3), ``M=num_seed*vote_per_seed``.
- vote_features: Voted features based on the seed points with \ - vote_features: Voted features based on the seed points with
shape (B, C, M) where ``M=num_seed*vote_per_seed``, \ shape (B, C, M) where ``M=num_seed*vote_per_seed``,
``C=vote_feature_dim``. ``C=vote_feature_dim``.
""" """
if self.num_points != -1: if self.num_points != -1:
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from mmdet.models.necks.fpn import FPN from mmdet.models.necks.fpn import FPN
from .dla_neck import DLANeck
from .imvoxel_neck import OutdoorImVoxelNeck from .imvoxel_neck import OutdoorImVoxelNeck
from .pointnet2_fp_neck import PointNetFPNeck
from .second_fpn import SECONDFPN from .second_fpn import SECONDFPN
__all__ = ['FPN', 'SECONDFPN', 'OutdoorImVoxelNeck'] __all__ = [
'FPN', 'SECONDFPN', 'OutdoorImVoxelNeck', 'PointNetFPNeck', 'DLANeck'
]
# Copyright (c) OpenMMLab. All rights reserved.
import math
import numpy as np
from mmcv.cnn import ConvModule, build_conv_layer
from mmcv.runner import BaseModule
from torch import nn as nn
from mmdet.models.builder import NECKS
def fill_up_weights(up):
"""Simulated bilinear upsampling kernel.
Args:
up (nn.Module): ConvTranspose2d module.
"""
w = up.weight.data
f = math.ceil(w.size(2) / 2)
c = (2 * f - 1 - f % 2) / (2. * f)
for i in range(w.size(2)):
for j in range(w.size(3)):
w[0, 0, i, j] = \
(1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
for c in range(1, w.size(0)):
w[c, 0, :, :] = w[0, 0, :, :]
class IDAUpsample(BaseModule):
"""Iterative Deep Aggregation (IDA) Upsampling module to upsample features
of different scales to a similar scale.
Args:
out_channels (int): Number of output channels for DeformConv.
in_channels (List[int]): List of input channels of multi-scale
feature maps.
kernel_sizes (List[int]): List of size of the convolving
kernel of different scales.
norm_cfg (dict, optional): Config dict for normalization layer.
Default: None.
use_dcn (bool, optional): If True, use DCNv2. Default: True.
"""
def __init__(
self,
out_channels,
in_channels,
kernel_sizes,
norm_cfg=None,
use_dcn=True,
init_cfg=None,
):
super(IDAUpsample, self).__init__(init_cfg)
self.use_dcn = use_dcn
self.projs = nn.ModuleList()
self.ups = nn.ModuleList()
self.nodes = nn.ModuleList()
for i in range(1, len(in_channels)):
in_channel = in_channels[i]
up_kernel_size = int(kernel_sizes[i])
proj = ConvModule(
in_channel,
out_channels,
3,
padding=1,
bias=True,
conv_cfg=dict(type='DCNv2') if self.use_dcn else None,
norm_cfg=norm_cfg)
node = ConvModule(
out_channels,
out_channels,
3,
padding=1,
bias=True,
conv_cfg=dict(type='DCNv2') if self.use_dcn else None,
norm_cfg=norm_cfg)
up = build_conv_layer(
dict(type='deconv'),
out_channels,
out_channels,
up_kernel_size * 2,
stride=up_kernel_size,
padding=up_kernel_size // 2,
output_padding=0,
groups=out_channels,
bias=False)
self.projs.append(proj)
self.ups.append(up)
self.nodes.append(node)
def forward(self, mlvl_features, start_level, end_level):
"""Forward function.
Args:
mlvl_features (list[torch.Tensor]): Features from multiple layers.
start_level (int): Start layer for feature upsampling.
end_level (int): End layer for feature upsampling.
"""
for i in range(start_level, end_level - 1):
upsample = self.ups[i - start_level]
project = self.projs[i - start_level]
mlvl_features[i + 1] = upsample(project(mlvl_features[i + 1]))
node = self.nodes[i - start_level]
mlvl_features[i + 1] = node(mlvl_features[i + 1] +
mlvl_features[i])
class DLAUpsample(BaseModule):
"""Deep Layer Aggregation (DLA) Upsampling module for different scales
feature extraction, upsampling and fusion, It consists of groups of
IDAupsample modules.
Args:
start_level (int): The start layer.
channels (List[int]): List of input channels of multi-scale
feature maps.
scales(List[int]): List of scale of different layers' feature.
in_channels (NoneType, optional): List of input channels of
different scales. Default: None.
norm_cfg (dict, optional): Config dict for normalization layer.
Default: None.
use_dcn (bool, optional): Whether to use dcn in IDAup module.
Default: True.
"""
def __init__(self,
start_level,
channels,
scales,
in_channels=None,
norm_cfg=None,
use_dcn=True,
init_cfg=None):
super(DLAUpsample, self).__init__(init_cfg)
self.start_level = start_level
if in_channels is None:
in_channels = channels
self.channels = channels
channels = list(channels)
scales = np.array(scales, dtype=int)
for i in range(len(channels) - 1):
j = -i - 2
setattr(
self, 'ida_{}'.format(i),
IDAUpsample(channels[j], in_channels[j:],
scales[j:] // scales[j], norm_cfg, use_dcn))
scales[j + 1:] = scales[j]
in_channels[j + 1:] = [channels[j] for _ in channels[j + 1:]]
def forward(self, mlvl_features):
"""Forward function.
Args:
mlvl_features(list[torch.Tensor]): Features from multi-scale
layers.
Returns:
tuple[torch.Tensor]: Up-sampled features of different layers.
"""
outs = [mlvl_features[-1]]
for i in range(len(mlvl_features) - self.start_level - 1):
ida = getattr(self, 'ida_{}'.format(i))
ida(mlvl_features, len(mlvl_features) - i - 2, len(mlvl_features))
outs.insert(0, mlvl_features[-1])
return outs
@NECKS.register_module()
class DLANeck(BaseModule):
"""DLA Neck.
Args:
in_channels (list[int], optional): List of input channels
of multi-scale feature map.
start_level (int, optional): The scale level where upsampling
starts. Default: 2.
end_level (int, optional): The scale level where upsampling
ends. Default: 5.
norm_cfg (dict, optional): Config dict for normalization
layer. Default: None.
use_dcn (bool, optional): Whether to use dcn in IDAup module.
Default: True.
"""
def __init__(self,
in_channels=[16, 32, 64, 128, 256, 512],
start_level=2,
end_level=5,
norm_cfg=None,
use_dcn=True,
init_cfg=None):
super(DLANeck, self).__init__(init_cfg)
self.start_level = start_level
self.end_level = end_level
scales = [2**i for i in range(len(in_channels[self.start_level:]))]
self.dla_up = DLAUpsample(
start_level=self.start_level,
channels=in_channels[self.start_level:],
scales=scales,
norm_cfg=norm_cfg,
use_dcn=use_dcn)
self.ida_up = IDAUpsample(
in_channels[self.start_level],
in_channels[self.start_level:self.end_level],
[2**i for i in range(self.end_level - self.start_level)], norm_cfg,
use_dcn)
def forward(self, x):
mlvl_features = [x[i] for i in range(len(x))]
mlvl_features = self.dla_up(mlvl_features)
outs = []
for i in range(self.end_level - self.start_level):
outs.append(mlvl_features[i].clone())
self.ida_up(outs, 0, len(outs))
return [outs[-1]]
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.ConvTranspose2d):
# In order to be consistent with the source code,
# reset the ConvTranspose2d initialization parameters
m.reset_parameters()
# Simulated bilinear upsampling kernel
fill_up_weights(m)
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Conv2d):
# In order to be consistent with the source code,
# reset the Conv2d initialization parameters
m.reset_parameters()
# Copyright (c) OpenMMLab. All rights reserved.
from mmcv.runner import BaseModule
from torch import nn as nn
from mmdet3d.ops import PointFPModule
from mmdet.models import NECKS
@NECKS.register_module()
class PointNetFPNeck(BaseModule):
r"""PointNet FP Module used in PointRCNN.
Refer to the `official code <https://github.com/charlesq34/pointnet2>`_.
.. code-block:: none
sa_n ----------------------------------------
|
... --------------------------------- |
| |
sa_1 ------------- | |
| | |
sa_0 -> fp_0 -> fp_module ->fp_1 -> ... -> fp_module -> fp_n
sa_n including sa_xyz (torch.Tensor) and sa_features (torch.Tensor)
fp_n including fp_xyz (torch.Tensor) and fp_features (torch.Tensor)
Args:
fp_channels (tuple[tuple[int]]): Tuple of mlp channels in FP modules.
init_cfg (dict or list[dict], optional): Initialization config dict.
Default: None
"""
def __init__(self, fp_channels, init_cfg=None):
super(PointNetFPNeck, self).__init__(init_cfg=init_cfg)
self.num_fp = len(fp_channels)
self.FP_modules = nn.ModuleList()
for cur_fp_mlps in fp_channels:
self.FP_modules.append(PointFPModule(mlp_channels=cur_fp_mlps))
def _extract_input(self, feat_dict):
"""Extract inputs from features dictionary.
Args:
feat_dict (dict): Feature dict from backbone, which may contain
the following keys and values:
- sa_xyz (list[torch.Tensor]): Points of each sa module
in shape (N, 3).
- sa_features (list[torch.Tensor]): Output features of
each sa module in shape (N, M).
Returns:
list[torch.Tensor]: Coordinates of multiple levels of points.
list[torch.Tensor]: Features of multiple levels of points.
"""
sa_xyz = feat_dict['sa_xyz']
sa_features = feat_dict['sa_features']
assert len(sa_xyz) == len(sa_features)
return sa_xyz, sa_features
def forward(self, feat_dict):
"""Forward pass.
Args:
feat_dict (dict): Feature dict from backbone.
Returns:
dict[str, torch.Tensor]: Outputs of the Neck.
- fp_xyz (torch.Tensor): The coordinates of fp features.
- fp_features (torch.Tensor): The features from the last
feature propagation layers.
"""
sa_xyz, sa_features = self._extract_input(feat_dict)
fp_feature = sa_features[-1]
fp_xyz = sa_xyz[-1]
for i in range(self.num_fp):
# consume the points in a bottom-up manner
fp_feature = self.FP_modules[i](sa_xyz[-(i + 2)], sa_xyz[-(i + 1)],
sa_features[-(i + 2)], fp_feature)
fp_xyz = sa_xyz[-(i + 2)]
ret = dict(fp_xyz=fp_xyz, fp_features=fp_feature)
return ret
...@@ -4,10 +4,11 @@ from .bbox_heads import PartA2BboxHead ...@@ -4,10 +4,11 @@ from .bbox_heads import PartA2BboxHead
from .h3d_roi_head import H3DRoIHead from .h3d_roi_head import H3DRoIHead
from .mask_heads import PointwiseSemanticHead, PrimitiveHead from .mask_heads import PointwiseSemanticHead, PrimitiveHead
from .part_aggregation_roi_head import PartAggregationROIHead from .part_aggregation_roi_head import PartAggregationROIHead
from .point_rcnn_roi_head import PointRCNNRoIHead
from .roi_extractors import Single3DRoIAwareExtractor, SingleRoIExtractor from .roi_extractors import Single3DRoIAwareExtractor, SingleRoIExtractor
__all__ = [ __all__ = [
'Base3DRoIHead', 'PartAggregationROIHead', 'PointwiseSemanticHead', 'Base3DRoIHead', 'PartAggregationROIHead', 'PointwiseSemanticHead',
'Single3DRoIAwareExtractor', 'PartA2BboxHead', 'SingleRoIExtractor', 'Single3DRoIAwareExtractor', 'PartA2BboxHead', 'SingleRoIExtractor',
'H3DRoIHead', 'PrimitiveHead' 'H3DRoIHead', 'PrimitiveHead', 'PointRCNNRoIHead'
] ]
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from abc import ABCMeta, abstractmethod from abc import ABCMeta, abstractmethod
from mmcv.runner import BaseModule from mmcv.runner import BaseModule
......
...@@ -5,9 +5,10 @@ from mmdet.models.roi_heads.bbox_heads import (BBoxHead, ConvFCBBoxHead, ...@@ -5,9 +5,10 @@ from mmdet.models.roi_heads.bbox_heads import (BBoxHead, ConvFCBBoxHead,
Shared4Conv1FCBBoxHead) Shared4Conv1FCBBoxHead)
from .h3d_bbox_head import H3DBboxHead from .h3d_bbox_head import H3DBboxHead
from .parta2_bbox_head import PartA2BboxHead from .parta2_bbox_head import PartA2BboxHead
from .point_rcnn_bbox_head import PointRCNNBboxHead
__all__ = [ __all__ = [
'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead', 'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead',
'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 'PartA2BboxHead', 'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 'PartA2BboxHead',
'H3DBboxHead' 'H3DBboxHead', 'PointRCNNBboxHead'
] ]
...@@ -20,7 +20,7 @@ class H3DBboxHead(BaseModule): ...@@ -20,7 +20,7 @@ class H3DBboxHead(BaseModule):
Args: Args:
num_classes (int): The number of classes. num_classes (int): The number of classes.
suface_matching_cfg (dict): Config for suface primitive matching. surface_matching_cfg (dict): Config for surface primitive matching.
line_matching_cfg (dict): Config for line primitive matching. line_matching_cfg (dict): Config for line primitive matching.
bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for encoding and bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for encoding and
decoding boxes. decoding boxes.
...@@ -36,7 +36,7 @@ class H3DBboxHead(BaseModule): ...@@ -36,7 +36,7 @@ class H3DBboxHead(BaseModule):
primitive_refine_channels (tuple[int]): Convolution channels of primitive_refine_channels (tuple[int]): Convolution channels of
prediction layer. prediction layer.
upper_thresh (float): Threshold for line matching. upper_thresh (float): Threshold for line matching.
surface_thresh (float): Threshold for suface matching. surface_thresh (float): Threshold for surface matching.
line_thresh (float): Threshold for line matching. line_thresh (float): Threshold for line matching.
conv_cfg (dict): Config of convolution in prediction layer. conv_cfg (dict): Config of convolution in prediction layer.
norm_cfg (dict): Config of BN in prediction layer. norm_cfg (dict): Config of BN in prediction layer.
...@@ -324,16 +324,16 @@ class H3DBboxHead(BaseModule): ...@@ -324,16 +324,16 @@ class H3DBboxHead(BaseModule):
Args: Args:
bbox_preds (dict): Predictions from forward of h3d bbox head. bbox_preds (dict): Predictions from forward of h3d bbox head.
points (list[torch.Tensor]): Input points. points (list[torch.Tensor]): Input points.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each sample. bboxes of each sample.
gt_labels_3d (list[torch.Tensor]): Labels of each sample. gt_labels_3d (list[torch.Tensor]): Labels of each sample.
pts_semantic_mask (None | list[torch.Tensor]): Point-wise pts_semantic_mask (list[torch.Tensor]): Point-wise
semantic mask. semantic mask.
pts_instance_mask (None | list[torch.Tensor]): Point-wise pts_instance_mask (list[torch.Tensor]): Point-wise
instance mask. instance mask.
img_metas (list[dict]): Contain pcd and img's meta info. img_metas (list[dict]): Contain pcd and img's meta info.
rpn_targets (Tuple) : Targets generated by rpn head. rpn_targets (Tuple) : Targets generated by rpn head.
gt_bboxes_ignore (None | list[torch.Tensor]): Specify gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding. which bounding.
Returns: Returns:
...@@ -502,7 +502,7 @@ class H3DBboxHead(BaseModule): ...@@ -502,7 +502,7 @@ class H3DBboxHead(BaseModule):
box_dim=bbox.shape[-1], box_dim=bbox.shape[-1],
with_yaw=self.bbox_coder.with_rot, with_yaw=self.bbox_coder.with_rot,
origin=(0.5, 0.5, 0.5)) origin=(0.5, 0.5, 0.5))
box_indices = bbox.points_in_boxes(points) box_indices = bbox.points_in_boxes_all(points)
corner3d = bbox.corners corner3d = bbox.corners
minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6))) minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6)))
...@@ -560,25 +560,25 @@ class H3DBboxHead(BaseModule): ...@@ -560,25 +560,25 @@ class H3DBboxHead(BaseModule):
Args: Args:
bbox_preds (dict): Predictions from forward of vote head. bbox_preds (dict): Predictions from forward of vote head.
size_class_targets (torch.Tensor): Ground truth \ size_class_targets (torch.Tensor): Ground truth
size class of each prediction bounding box. size class of each prediction bounding box.
size_res_targets (torch.Tensor): Ground truth \ size_res_targets (torch.Tensor): Ground truth
size residual of each prediction bounding box. size residual of each prediction bounding box.
dir_class_targets (torch.Tensor): Ground truth \ dir_class_targets (torch.Tensor): Ground truth
direction class of each prediction bounding box. direction class of each prediction bounding box.
dir_res_targets (torch.Tensor): Ground truth \ dir_res_targets (torch.Tensor): Ground truth
direction residual of each prediction bounding box. direction residual of each prediction bounding box.
center_targets (torch.Tensor): Ground truth center \ center_targets (torch.Tensor): Ground truth center
of each prediction bounding box. of each prediction bounding box.
mask_targets (torch.Tensor): Validation of each \ mask_targets (torch.Tensor): Validation of each
prediction bounding box. prediction bounding box.
objectness_targets (torch.Tensor): Ground truth \ objectness_targets (torch.Tensor): Ground truth
objectness label of each prediction bounding box. objectness label of each prediction bounding box.
objectness_weights (torch.Tensor): Weights of objectness \ objectness_weights (torch.Tensor): Weights of objectness
loss for each prediction bounding box. loss for each prediction bounding box.
box_loss_weights (torch.Tensor): Weights of regression \ box_loss_weights (torch.Tensor): Weights of regression
loss for each prediction bounding box. loss for each prediction bounding box.
valid_gt_weights (torch.Tensor): Validation of each \ valid_gt_weights (torch.Tensor): Validation of each
ground truth bounding box. ground truth bounding box.
Returns: Returns:
...@@ -663,12 +663,12 @@ class H3DBboxHead(BaseModule): ...@@ -663,12 +663,12 @@ class H3DBboxHead(BaseModule):
Args: Args:
points (list[torch.Tensor]): Points of each batch. points (list[torch.Tensor]): Points of each batch.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each batch. bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): Labels of each batch. gt_labels_3d (list[torch.Tensor]): Labels of each batch.
pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic pts_semantic_mask (list[torch.Tensor]): Point-wise semantic
label of each batch. label of each batch.
pts_instance_mask (None | list[torch.Tensor]): Point-wise instance pts_instance_mask (list[torch.Tensor]): Point-wise instance
label of each batch. label of each batch.
bbox_preds (torch.Tensor): Bounding box predictions of vote head. bbox_preds (torch.Tensor): Bounding box predictions of vote head.
...@@ -769,22 +769,22 @@ class H3DBboxHead(BaseModule): ...@@ -769,22 +769,22 @@ class H3DBboxHead(BaseModule):
Args: Args:
points (torch.Tensor): Points of each batch. points (torch.Tensor): Points of each batch.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \ gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth
boxes of each batch. boxes of each batch.
gt_labels_3d (torch.Tensor): Labels of each batch. gt_labels_3d (torch.Tensor): Labels of each batch.
pts_semantic_mask (None | torch.Tensor): Point-wise semantic pts_semantic_mask (torch.Tensor): Point-wise semantic
label of each batch. label of each batch.
pts_instance_mask (None | torch.Tensor): Point-wise instance pts_instance_mask (torch.Tensor): Point-wise instance
label of each batch. label of each batch.
aggregated_points (torch.Tensor): Aggregated points from aggregated_points (torch.Tensor): Aggregated points from
vote aggregation layer. vote aggregation layer.
pred_surface_center (torch.Tensor): Prediction of surface center. pred_surface_center (torch.Tensor): Prediction of surface center.
pred_line_center (torch.Tensor): Prediction of line center. pred_line_center (torch.Tensor): Prediction of line center.
pred_obj_surface_center (torch.Tensor): Objectness prediction \ pred_obj_surface_center (torch.Tensor): Objectness prediction
of surface center. of surface center.
pred_obj_line_center (torch.Tensor): Objectness prediction of \ pred_obj_line_center (torch.Tensor): Objectness prediction of
line center. line center.
pred_surface_sem (torch.Tensor): Semantic prediction of \ pred_surface_sem (torch.Tensor): Semantic prediction of
surface center. surface center.
pred_line_sem (torch.Tensor): Semantic prediction of line center. pred_line_sem (torch.Tensor): Semantic prediction of line center.
Returns: Returns:
......
...@@ -285,7 +285,7 @@ class PartA2BboxHead(BaseModule): ...@@ -285,7 +285,7 @@ class PartA2BboxHead(BaseModule):
def loss(self, cls_score, bbox_pred, rois, labels, bbox_targets, def loss(self, cls_score, bbox_pred, rois, labels, bbox_targets,
pos_gt_bboxes, reg_mask, label_weights, bbox_weights): pos_gt_bboxes, reg_mask, label_weights, bbox_weights):
"""Coumputing losses. """Computing losses.
Args: Args:
cls_score (torch.Tensor): Scores of each roi. cls_score (torch.Tensor): Scores of each roi.
...@@ -344,7 +344,7 @@ class PartA2BboxHead(BaseModule): ...@@ -344,7 +344,7 @@ class PartA2BboxHead(BaseModule):
pred_boxes3d[..., 0:3] = rotation_3d_in_axis( pred_boxes3d[..., 0:3] = rotation_3d_in_axis(
pred_boxes3d[..., 0:3].unsqueeze(1), pred_boxes3d[..., 0:3].unsqueeze(1),
(pos_rois_rotation + np.pi / 2), pos_rois_rotation,
axis=2).squeeze(1) axis=2).squeeze(1)
pred_boxes3d[:, 0:3] += roi_xyz pred_boxes3d[:, 0:3] += roi_xyz
...@@ -436,8 +436,7 @@ class PartA2BboxHead(BaseModule): ...@@ -436,8 +436,7 @@ class PartA2BboxHead(BaseModule):
pos_gt_bboxes_ct[..., 0:3] -= roi_center pos_gt_bboxes_ct[..., 0:3] -= roi_center
pos_gt_bboxes_ct[..., 6] -= roi_ry pos_gt_bboxes_ct[..., 6] -= roi_ry
pos_gt_bboxes_ct[..., 0:3] = rotation_3d_in_axis( pos_gt_bboxes_ct[..., 0:3] = rotation_3d_in_axis(
pos_gt_bboxes_ct[..., 0:3].unsqueeze(1), pos_gt_bboxes_ct[..., 0:3].unsqueeze(1), -roi_ry,
-(roi_ry + np.pi / 2),
axis=2).squeeze(1) axis=2).squeeze(1)
# flip orientation if rois have opposite orientation # flip orientation if rois have opposite orientation
...@@ -462,12 +461,13 @@ class PartA2BboxHead(BaseModule): ...@@ -462,12 +461,13 @@ class PartA2BboxHead(BaseModule):
return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights, return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
bbox_weights) bbox_weights)
def get_corner_loss_lidar(self, pred_bbox3d, gt_bbox3d, delta=1): def get_corner_loss_lidar(self, pred_bbox3d, gt_bbox3d, delta=1.0):
"""Calculate corner loss of given boxes. """Calculate corner loss of given boxes.
Args: Args:
pred_bbox3d (torch.FloatTensor): Predicted boxes in shape (N, 7). pred_bbox3d (torch.FloatTensor): Predicted boxes in shape (N, 7).
gt_bbox3d (torch.FloatTensor): Ground truth boxes in shape (N, 7). gt_bbox3d (torch.FloatTensor): Ground truth boxes in shape (N, 7).
delta (float, optional): huber loss threshold. Defaults to 1.0
Returns: Returns:
torch.FloatTensor: Calculated corner loss in shape (N). torch.FloatTensor: Calculated corner loss in shape (N).
...@@ -490,8 +490,8 @@ class PartA2BboxHead(BaseModule): ...@@ -490,8 +490,8 @@ class PartA2BboxHead(BaseModule):
torch.norm(pred_box_corners - gt_box_corners_flip, torch.norm(pred_box_corners - gt_box_corners_flip,
dim=2)) # (N, 8) dim=2)) # (N, 8)
# huber loss # huber loss
abs_error = torch.abs(corner_dist) abs_error = corner_dist.abs()
quadratic = torch.clamp(abs_error, max=delta) quadratic = abs_error.clamp(max=delta)
linear = (abs_error - quadratic) linear = (abs_error - quadratic)
corner_loss = 0.5 * quadratic**2 + delta * linear corner_loss = 0.5 * quadratic**2 + delta * linear
...@@ -530,8 +530,7 @@ class PartA2BboxHead(BaseModule): ...@@ -530,8 +530,7 @@ class PartA2BboxHead(BaseModule):
local_roi_boxes[..., 0:3] = 0 local_roi_boxes[..., 0:3] = 0
rcnn_boxes3d = self.bbox_coder.decode(local_roi_boxes, bbox_pred) rcnn_boxes3d = self.bbox_coder.decode(local_roi_boxes, bbox_pred)
rcnn_boxes3d[..., 0:3] = rotation_3d_in_axis( rcnn_boxes3d[..., 0:3] = rotation_3d_in_axis(
rcnn_boxes3d[..., 0:3].unsqueeze(1), (roi_ry + np.pi / 2), rcnn_boxes3d[..., 0:3].unsqueeze(1), roi_ry, axis=2).squeeze(1)
axis=2).squeeze(1)
rcnn_boxes3d[:, 0:3] += roi_xyz rcnn_boxes3d[:, 0:3] += roi_xyz
# post processing # post processing
...@@ -542,13 +541,13 @@ class PartA2BboxHead(BaseModule): ...@@ -542,13 +541,13 @@ class PartA2BboxHead(BaseModule):
cur_box_prob = class_pred[batch_id] cur_box_prob = class_pred[batch_id]
cur_rcnn_boxes3d = rcnn_boxes3d[roi_batch_id == batch_id] cur_rcnn_boxes3d = rcnn_boxes3d[roi_batch_id == batch_id]
selected = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d, keep = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d,
cfg.score_thr, cfg.nms_thr, cfg.score_thr, cfg.nms_thr,
img_metas[batch_id], img_metas[batch_id],
cfg.use_rotate_nms) cfg.use_rotate_nms)
selected_bboxes = cur_rcnn_boxes3d[selected] selected_bboxes = cur_rcnn_boxes3d[keep]
selected_label_preds = cur_class_labels[selected] selected_label_preds = cur_class_labels[keep]
selected_scores = cur_cls_score[selected] selected_scores = cur_cls_score[keep]
result_list.append( result_list.append(
(img_metas[batch_id]['box_type_3d'](selected_bboxes, (img_metas[batch_id]['box_type_3d'](selected_bboxes,
...@@ -576,7 +575,7 @@ class PartA2BboxHead(BaseModule): ...@@ -576,7 +575,7 @@ class PartA2BboxHead(BaseModule):
box_preds (torch.Tensor): Predicted boxes in shape (N, 7+C). box_preds (torch.Tensor): Predicted boxes in shape (N, 7+C).
score_thr (float): Threshold of scores. score_thr (float): Threshold of scores.
nms_thr (float): Threshold for NMS. nms_thr (float): Threshold for NMS.
input_meta (dict): Meta informations of the current sample. input_meta (dict): Meta information of the current sample.
use_rotate_nms (bool, optional): Whether to use rotated nms. use_rotate_nms (bool, optional): Whether to use rotated nms.
Defaults to True. Defaults to True.
...@@ -620,6 +619,6 @@ class PartA2BboxHead(BaseModule): ...@@ -620,6 +619,6 @@ class PartA2BboxHead(BaseModule):
dtype=torch.int64, dtype=torch.int64,
device=box_preds.device)) device=box_preds.device))
selected = torch.cat( keep = torch.cat(
selected_list, dim=0) if len(selected_list) > 0 else [] selected_list, dim=0) if len(selected_list) > 0 else []
return selected return keep
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import torch
from mmcv.cnn import ConvModule, normal_init
from mmcv.cnn.bricks import build_conv_layer
from mmcv.runner import BaseModule
from torch import nn as nn
from mmdet3d.core.bbox.structures import (LiDARInstance3DBoxes,
rotation_3d_in_axis, xywhr2xyxyr)
from mmdet3d.models.builder import build_loss
from mmdet3d.ops import build_sa_module
from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu
from mmdet.core import build_bbox_coder, multi_apply
from mmdet.models import HEADS
@HEADS.register_module()
class PointRCNNBboxHead(BaseModule):
"""PointRCNN RoI Bbox head.
Args:
num_classes (int): The number of classes to prediction.
in_channels (int): Input channels of point features.
mlp_channels (list[int]): the number of mlp channels
pred_layer_cfg (dict, optional): Config of classfication and
regression prediction layers. Defaults to None.
num_points (tuple, optional): The number of points which each SA
module samples. Defaults to (128, 32, -1).
radius (tuple, optional): Sampling radius of each SA module.
Defaults to (0.2, 0.4, 100).
num_samples (tuple, optional): The number of samples for ball query
in each SA module. Defaults to (64, 64, 64).
sa_channels (tuple, optional): Out channels of each mlp in SA module.
Defaults to ((128, 128, 128), (128, 128, 256), (256, 256, 512)).
bbox_coder (dict, optional): Config dict of box coders.
Defaults to dict(type='DeltaXYZWLHRBBoxCoder').
sa_cfg (dict, optional): Config of set abstraction module, which may
contain the following keys and values:
- pool_mod (str): Pool method ('max' or 'avg') for SA modules.
- use_xyz (bool): Whether to use xyz as a part of features.
- normalize_xyz (bool): Whether to normalize xyz with radii in
each SA module.
Defaults to dict(type='PointSAModule', pool_mod='max',
use_xyz=True).
conv_cfg (dict, optional): Config dict of convolutional layers.
Defaults to dict(type='Conv1d').
norm_cfg (dict, optional): Config dict of normalization layers.
Defaults to dict(type='BN1d').
act_cfg (dict, optional): Config dict of activation layers.
Defaults to dict(type='ReLU').
bias (str, optional): Type of bias. Defaults to 'auto'.
loss_bbox (dict, optional): Config of regression loss function.
Defaults to dict(type='SmoothL1Loss', beta=1.0 / 9.0,
reduction='sum', loss_weight=1.0).
loss_cls (dict, optional): Config of classification loss function.
Defaults to dict(type='CrossEntropyLoss', use_sigmoid=True,
reduction='sum', loss_weight=1.0).
with_corner_loss (bool, optional): Whether using corner loss.
Defaults to True.
init_cfg (dict, optional): Config of initialization. Defaults to None.
"""
def __init__(
self,
num_classes,
in_channels,
mlp_channels,
pred_layer_cfg=None,
num_points=(128, 32, -1),
radius=(0.2, 0.4, 100),
num_samples=(64, 64, 64),
sa_channels=((128, 128, 128), (128, 128, 256), (256, 256, 512)),
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
sa_cfg=dict(type='PointSAModule', pool_mod='max', use_xyz=True),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
act_cfg=dict(type='ReLU'),
bias='auto',
loss_bbox=dict(
type='SmoothL1Loss',
beta=1.0 / 9.0,
reduction='sum',
loss_weight=1.0),
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
reduction='sum',
loss_weight=1.0),
with_corner_loss=True,
init_cfg=None):
super(PointRCNNBboxHead, self).__init__(init_cfg=init_cfg)
self.num_classes = num_classes
self.num_sa = len(sa_channels)
self.with_corner_loss = with_corner_loss
self.conv_cfg = conv_cfg
self.norm_cfg = norm_cfg
self.act_cfg = act_cfg
self.bias = bias
self.loss_bbox = build_loss(loss_bbox)
self.loss_cls = build_loss(loss_cls)
self.bbox_coder = build_bbox_coder(bbox_coder)
self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
self.in_channels = in_channels
mlp_channels = [self.in_channels] + mlp_channels
shared_mlps = nn.Sequential()
for i in range(len(mlp_channels) - 1):
shared_mlps.add_module(
f'layer{i}',
ConvModule(
mlp_channels[i],
mlp_channels[i + 1],
kernel_size=(1, 1),
stride=(1, 1),
inplace=False,
conv_cfg=dict(type='Conv2d')))
self.xyz_up_layer = nn.Sequential(*shared_mlps)
c_out = mlp_channels[-1]
self.merge_down_layer = ConvModule(
c_out * 2,
c_out,
kernel_size=(1, 1),
stride=(1, 1),
inplace=False,
conv_cfg=dict(type='Conv2d'))
pre_channels = c_out
self.SA_modules = nn.ModuleList()
sa_in_channel = pre_channels
for sa_index in range(self.num_sa):
cur_sa_mlps = list(sa_channels[sa_index])
cur_sa_mlps = [sa_in_channel] + cur_sa_mlps
sa_out_channel = cur_sa_mlps[-1]
cur_num_points = num_points[sa_index]
if cur_num_points <= 0:
cur_num_points = None
self.SA_modules.append(
build_sa_module(
num_point=cur_num_points,
radius=radius[sa_index],
num_sample=num_samples[sa_index],
mlp_channels=cur_sa_mlps,
cfg=sa_cfg))
sa_in_channel = sa_out_channel
self.cls_convs = self._add_conv_branch(
pred_layer_cfg.in_channels, pred_layer_cfg.cls_conv_channels)
self.reg_convs = self._add_conv_branch(
pred_layer_cfg.in_channels, pred_layer_cfg.reg_conv_channels)
prev_channel = pred_layer_cfg.cls_conv_channels[-1]
self.conv_cls = build_conv_layer(
self.conv_cfg,
in_channels=prev_channel,
out_channels=self.num_classes,
kernel_size=1)
prev_channel = pred_layer_cfg.reg_conv_channels[-1]
self.conv_reg = build_conv_layer(
self.conv_cfg,
in_channels=prev_channel,
out_channels=self.bbox_coder.code_size * self.num_classes,
kernel_size=1)
if init_cfg is None:
self.init_cfg = dict(type='Xavier', layer=['Conv2d', 'Conv1d'])
def _add_conv_branch(self, in_channels, conv_channels):
"""Add shared or separable branch.
Args:
in_channels (int): Input feature channel.
conv_channels (tuple): Middle feature channels.
"""
conv_spec = [in_channels] + list(conv_channels)
# add branch specific conv layers
conv_layers = nn.Sequential()
for i in range(len(conv_spec) - 1):
conv_layers.add_module(
f'layer{i}',
ConvModule(
conv_spec[i],
conv_spec[i + 1],
kernel_size=1,
padding=0,
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg,
act_cfg=self.act_cfg,
bias=self.bias,
inplace=True))
return conv_layers
def init_weights(self):
"""Initialize weights of the head."""
super().init_weights()
for m in self.modules():
if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d):
if m.bias is not None:
nn.init.constant_(m.bias, 0)
normal_init(self.conv_reg.weight, mean=0, std=0.001)
def forward(self, feats):
"""Forward pass.
Args:
feats (torch.Torch): Features from RCNN modules.
Returns:
tuple[torch.Tensor]: Score of class and bbox predictions.
"""
input_data = feats.clone().detach()
xyz_input = input_data[..., 0:self.in_channels].transpose(
1, 2).unsqueeze(dim=3).contiguous().clone().detach()
xyz_features = self.xyz_up_layer(xyz_input)
rpn_features = input_data[..., self.in_channels:].transpose(
1, 2).unsqueeze(dim=3)
merged_features = torch.cat((xyz_features, rpn_features), dim=1)
merged_features = self.merge_down_layer(merged_features)
l_xyz, l_features = [input_data[..., 0:3].contiguous()], \
[merged_features.squeeze(dim=3)]
for i in range(len(self.SA_modules)):
li_xyz, li_features, cur_indices = \
self.SA_modules[i](l_xyz[i], l_features[i])
l_xyz.append(li_xyz)
l_features.append(li_features)
shared_features = l_features[-1]
x_cls = shared_features
x_reg = shared_features
x_cls = self.cls_convs(x_cls)
rcnn_cls = self.conv_cls(x_cls)
x_reg = self.reg_convs(x_reg)
rcnn_reg = self.conv_reg(x_reg)
rcnn_cls = rcnn_cls.transpose(1, 2).contiguous().squeeze(dim=1)
rcnn_reg = rcnn_reg.transpose(1, 2).contiguous().squeeze(dim=1)
return (rcnn_cls, rcnn_reg)
def loss(self, cls_score, bbox_pred, rois, labels, bbox_targets,
pos_gt_bboxes, reg_mask, label_weights, bbox_weights):
"""Computing losses.
Args:
cls_score (torch.Tensor): Scores of each RoI.
bbox_pred (torch.Tensor): Predictions of bboxes.
rois (torch.Tensor): RoI bboxes.
labels (torch.Tensor): Labels of class.
bbox_targets (torch.Tensor): Target of positive bboxes.
pos_gt_bboxes (torch.Tensor): Ground truths of positive bboxes.
reg_mask (torch.Tensor): Mask for positive bboxes.
label_weights (torch.Tensor): Weights of class loss.
bbox_weights (torch.Tensor): Weights of bbox loss.
Returns:
dict: Computed losses.
- loss_cls (torch.Tensor): Loss of classes.
- loss_bbox (torch.Tensor): Loss of bboxes.
- loss_corner (torch.Tensor): Loss of corners.
"""
losses = dict()
rcnn_batch_size = cls_score.shape[0]
# calculate class loss
cls_flat = cls_score.view(-1)
loss_cls = self.loss_cls(cls_flat, labels, label_weights)
losses['loss_cls'] = loss_cls
# calculate regression loss
code_size = self.bbox_coder.code_size
pos_inds = (reg_mask > 0)
pos_bbox_pred = bbox_pred.view(rcnn_batch_size, -1)[pos_inds].clone()
bbox_weights_flat = bbox_weights[pos_inds].view(-1, 1).repeat(
1, pos_bbox_pred.shape[-1])
loss_bbox = self.loss_bbox(
pos_bbox_pred.unsqueeze(dim=0),
bbox_targets.unsqueeze(dim=0).detach(),
bbox_weights_flat.unsqueeze(dim=0))
losses['loss_bbox'] = loss_bbox
if pos_inds.any() != 0 and self.with_corner_loss:
rois = rois.detach()
pos_roi_boxes3d = rois[..., 1:].view(-1, code_size)[pos_inds]
pos_roi_boxes3d = pos_roi_boxes3d.view(-1, code_size)
batch_anchors = pos_roi_boxes3d.clone().detach()
pos_rois_rotation = pos_roi_boxes3d[..., 6].view(-1)
roi_xyz = pos_roi_boxes3d[..., 0:3].view(-1, 3)
batch_anchors[..., 0:3] = 0
# decode boxes
pred_boxes3d = self.bbox_coder.decode(
batch_anchors,
pos_bbox_pred.view(-1, code_size)).view(-1, code_size)
pred_boxes3d[..., 0:3] = rotation_3d_in_axis(
pred_boxes3d[..., 0:3].unsqueeze(1), (pos_rois_rotation),
axis=2).squeeze(1)
pred_boxes3d[:, 0:3] += roi_xyz
# calculate corner loss
loss_corner = self.get_corner_loss_lidar(pred_boxes3d,
pos_gt_bboxes)
losses['loss_corner'] = loss_corner
else:
losses['loss_corner'] = loss_cls.new_tensor(0)
return losses
def get_corner_loss_lidar(self, pred_bbox3d, gt_bbox3d, delta=1.0):
"""Calculate corner loss of given boxes.
Args:
pred_bbox3d (torch.FloatTensor): Predicted boxes in shape (N, 7).
gt_bbox3d (torch.FloatTensor): Ground truth boxes in shape (N, 7).
delta (float, optional): huber loss threshold. Defaults to 1.0
Returns:
torch.FloatTensor: Calculated corner loss in shape (N).
"""
assert pred_bbox3d.shape[0] == gt_bbox3d.shape[0]
# This is a little bit hack here because we assume the box for
# PointRCNN is in LiDAR coordinates
gt_boxes_structure = LiDARInstance3DBoxes(gt_bbox3d)
pred_box_corners = LiDARInstance3DBoxes(pred_bbox3d).corners
gt_box_corners = gt_boxes_structure.corners
# This flip only changes the heading direction of GT boxes
gt_bbox3d_flip = gt_boxes_structure.clone()
gt_bbox3d_flip.tensor[:, 6] += np.pi
gt_box_corners_flip = gt_bbox3d_flip.corners
corner_dist = torch.min(
torch.norm(pred_box_corners - gt_box_corners, dim=2),
torch.norm(pred_box_corners - gt_box_corners_flip, dim=2))
# huber loss
abs_error = corner_dist.abs()
quadratic = abs_error.clamp(max=delta)
linear = (abs_error - quadratic)
corner_loss = 0.5 * quadratic**2 + delta * linear
return corner_loss.mean(dim=1)
def get_targets(self, sampling_results, rcnn_train_cfg, concat=True):
"""Generate targets.
Args:
sampling_results (list[:obj:`SamplingResult`]):
Sampled results from rois.
rcnn_train_cfg (:obj:`ConfigDict`): Training config of rcnn.
concat (bool, optional): Whether to concatenate targets between
batches. Defaults to True.
Returns:
tuple[torch.Tensor]: Targets of boxes and class prediction.
"""
pos_bboxes_list = [res.pos_bboxes for res in sampling_results]
pos_gt_bboxes_list = [res.pos_gt_bboxes for res in sampling_results]
iou_list = [res.iou for res in sampling_results]
targets = multi_apply(
self._get_target_single,
pos_bboxes_list,
pos_gt_bboxes_list,
iou_list,
cfg=rcnn_train_cfg)
(label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
bbox_weights) = targets
if concat:
label = torch.cat(label, 0)
bbox_targets = torch.cat(bbox_targets, 0)
pos_gt_bboxes = torch.cat(pos_gt_bboxes, 0)
reg_mask = torch.cat(reg_mask, 0)
label_weights = torch.cat(label_weights, 0)
label_weights /= torch.clamp(label_weights.sum(), min=1.0)
bbox_weights = torch.cat(bbox_weights, 0)
bbox_weights /= torch.clamp(bbox_weights.sum(), min=1.0)
return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
bbox_weights)
def _get_target_single(self, pos_bboxes, pos_gt_bboxes, ious, cfg):
"""Generate training targets for a single sample.
Args:
pos_bboxes (torch.Tensor): Positive boxes with shape
(N, 7).
pos_gt_bboxes (torch.Tensor): Ground truth boxes with shape
(M, 7).
ious (torch.Tensor): IoU between `pos_bboxes` and `pos_gt_bboxes`
in shape (N, M).
cfg (dict): Training configs.
Returns:
tuple[torch.Tensor]: Target for positive boxes.
(label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
bbox_weights)
"""
cls_pos_mask = ious > cfg.cls_pos_thr
cls_neg_mask = ious < cfg.cls_neg_thr
interval_mask = (cls_pos_mask == 0) & (cls_neg_mask == 0)
# iou regression target
label = (cls_pos_mask > 0).float()
label[interval_mask] = (ious[interval_mask] - cfg.cls_neg_thr) / \
(cfg.cls_pos_thr - cfg.cls_neg_thr)
# label weights
label_weights = (label >= 0).float()
# box regression target
reg_mask = pos_bboxes.new_zeros(ious.size(0)).long()
reg_mask[0:pos_gt_bboxes.size(0)] = 1
bbox_weights = (reg_mask > 0).float()
if reg_mask.bool().any():
pos_gt_bboxes_ct = pos_gt_bboxes.clone().detach()
roi_center = pos_bboxes[..., 0:3]
roi_ry = pos_bboxes[..., 6] % (2 * np.pi)
# canonical transformation
pos_gt_bboxes_ct[..., 0:3] -= roi_center
pos_gt_bboxes_ct[..., 6] -= roi_ry
pos_gt_bboxes_ct[..., 0:3] = rotation_3d_in_axis(
pos_gt_bboxes_ct[..., 0:3].unsqueeze(1), -(roi_ry),
axis=2).squeeze(1)
# flip orientation if gt have opposite orientation
ry_label = pos_gt_bboxes_ct[..., 6] % (2 * np.pi) # 0 ~ 2pi
is_opposite = (ry_label > np.pi * 0.5) & (ry_label < np.pi * 1.5)
ry_label[is_opposite] = (ry_label[is_opposite] + np.pi) % (
2 * np.pi) # (0 ~ pi/2, 3pi/2 ~ 2pi)
flag = ry_label > np.pi
ry_label[flag] = ry_label[flag] - np.pi * 2 # (-pi/2, pi/2)
ry_label = torch.clamp(ry_label, min=-np.pi / 2, max=np.pi / 2)
pos_gt_bboxes_ct[..., 6] = ry_label
rois_anchor = pos_bboxes.clone().detach()
rois_anchor[:, 0:3] = 0
rois_anchor[:, 6] = 0
bbox_targets = self.bbox_coder.encode(rois_anchor,
pos_gt_bboxes_ct)
else:
# no fg bbox
bbox_targets = pos_gt_bboxes.new_empty((0, 7))
return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
bbox_weights)
def get_bboxes(self,
rois,
cls_score,
bbox_pred,
class_labels,
img_metas,
cfg=None):
"""Generate bboxes from bbox head predictions.
Args:
rois (torch.Tensor): RoI bounding boxes.
cls_score (torch.Tensor): Scores of bounding boxes.
bbox_pred (torch.Tensor): Bounding boxes predictions
class_labels (torch.Tensor): Label of classes
img_metas (list[dict]): Point cloud and image's meta info.
cfg (:obj:`ConfigDict`, optional): Testing config.
Defaults to None.
Returns:
list[tuple]: Decoded bbox, scores and labels after nms.
"""
roi_batch_id = rois[..., 0]
roi_boxes = rois[..., 1:] # boxes without batch id
batch_size = int(roi_batch_id.max().item() + 1)
# decode boxes
roi_ry = roi_boxes[..., 6].view(-1)
roi_xyz = roi_boxes[..., 0:3].view(-1, 3)
local_roi_boxes = roi_boxes.clone().detach()
local_roi_boxes[..., 0:3] = 0
rcnn_boxes3d = self.bbox_coder.decode(local_roi_boxes, bbox_pred)
rcnn_boxes3d[..., 0:3] = rotation_3d_in_axis(
rcnn_boxes3d[..., 0:3].unsqueeze(1), (roi_ry), axis=2).squeeze(1)
rcnn_boxes3d[:, 0:3] += roi_xyz
# post processing
result_list = []
for batch_id in range(batch_size):
cur_class_labels = class_labels[batch_id]
cur_cls_score = cls_score[roi_batch_id == batch_id].view(-1)
cur_box_prob = cls_score[batch_id]
cur_box_prob = cur_cls_score.unsqueeze(1)
cur_rcnn_boxes3d = rcnn_boxes3d[roi_batch_id == batch_id]
keep = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d,
cfg.score_thr, cfg.nms_thr,
img_metas[batch_id],
cfg.use_rotate_nms)
selected_bboxes = cur_rcnn_boxes3d[keep]
selected_label_preds = cur_class_labels[keep]
selected_scores = cur_cls_score[keep]
result_list.append(
(img_metas[batch_id]['box_type_3d'](selected_bboxes,
self.bbox_coder.code_size),
selected_scores, selected_label_preds))
return result_list
def multi_class_nms(self,
box_probs,
box_preds,
score_thr,
nms_thr,
input_meta,
use_rotate_nms=True):
"""Multi-class NMS for box head.
Note:
This function has large overlap with the `box3d_multiclass_nms`
implemented in `mmdet3d.core.post_processing`. We are considering
merging these two functions in the future.
Args:
box_probs (torch.Tensor): Predicted boxes probabitilies in
shape (N,).
box_preds (torch.Tensor): Predicted boxes in shape (N, 7+C).
score_thr (float): Threshold of scores.
nms_thr (float): Threshold for NMS.
input_meta (dict): Meta information of the current sample.
use_rotate_nms (bool, optional): Whether to use rotated nms.
Defaults to True.
Returns:
torch.Tensor: Selected indices.
"""
if use_rotate_nms:
nms_func = nms_gpu
else:
nms_func = nms_normal_gpu
assert box_probs.shape[
1] == self.num_classes, f'box_probs shape: {str(box_probs.shape)}'
selected_list = []
selected_labels = []
boxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](
box_preds, self.bbox_coder.code_size).bev)
score_thresh = score_thr if isinstance(
score_thr, list) else [score_thr for x in range(self.num_classes)]
nms_thresh = nms_thr if isinstance(
nms_thr, list) else [nms_thr for x in range(self.num_classes)]
for k in range(0, self.num_classes):
class_scores_keep = box_probs[:, k] >= score_thresh[k]
if class_scores_keep.int().sum() > 0:
original_idxs = class_scores_keep.nonzero(
as_tuple=False).view(-1)
cur_boxes_for_nms = boxes_for_nms[class_scores_keep]
cur_rank_scores = box_probs[class_scores_keep, k]
cur_selected = nms_func(cur_boxes_for_nms, cur_rank_scores,
nms_thresh[k])
if cur_selected.shape[0] == 0:
continue
selected_list.append(original_idxs[cur_selected])
selected_labels.append(
torch.full([cur_selected.shape[0]],
k + 1,
dtype=torch.int64,
device=box_preds.device))
keep = torch.cat(
selected_list, dim=0) if len(selected_list) > 0 else []
return keep
...@@ -65,15 +65,15 @@ class H3DRoIHead(Base3DRoIHead): ...@@ -65,15 +65,15 @@ class H3DRoIHead(Base3DRoIHead):
feats_dict (dict): Contains features from the first stage. feats_dict (dict): Contains features from the first stage.
img_metas (list[dict]): Contain pcd and img's meta info. img_metas (list[dict]): Contain pcd and img's meta info.
points (list[torch.Tensor]): Input points. points (list[torch.Tensor]): Input points.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each sample. bboxes of each sample.
gt_labels_3d (list[torch.Tensor]): Labels of each sample. gt_labels_3d (list[torch.Tensor]): Labels of each sample.
pts_semantic_mask (None | list[torch.Tensor]): Point-wise pts_semantic_mask (list[torch.Tensor]): Point-wise
semantic mask. semantic mask.
pts_instance_mask (None | list[torch.Tensor]): Point-wise pts_instance_mask (list[torch.Tensor]): Point-wise
instance mask. instance mask.
gt_bboxes_ignore (None | list[torch.Tensor]): Specify gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding. which bounding boxes to ignore.
Returns: Returns:
dict: losses from each head. dict: losses from each head.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment