Bump version to V1.0.0rc0

Bump version to V1.0.0rc0

Bump version to V1.0.0rc0
32a4328b · Wenwei Zhang · GitHub · 86cc487c · a8817998 · 32a4328b
Unverified Commit 32a4328b authored Feb 24, 2022 by Wenwei Zhang Committed by GitHub Feb 24, 2022
20 changed files
--- a/mmdet3d/models/losses/__init__.py
+++ b/mmdet3d/models/losses/__init__.py
@@ -2,10 +2,13 @@
 from mmdet.models.losses import FocalLoss, SmoothL1Loss, binary_cross_entropy
 from .axis_aligned_iou_loss import AxisAlignedIoULoss, axis_aligned_iou_loss
 from .chamfer_distance import ChamferDistance, chamfer_distance
+from .multibin_loss import MultiBinLoss
 from .paconv_regularization_loss import PAConvRegularizationLoss
+from .uncertain_smooth_l1_loss import UncertainL1Loss, UncertainSmoothL1Loss

 __all__ = [
    'FocalLoss', 'SmoothL1Loss', 'binary_cross_entropy', 'ChamferDistance',
    'chamfer_distance', 'axis_aligned_iou_loss', 'AxisAlignedIoULoss',
-    'PAConvRegularizationLoss'
+    'PAConvRegularizationLoss', 'UncertainL1Loss', 'UncertainSmoothL1Loss',
+    'MultiBinLoss'
 ]
--- a/mmdet3d/models/losses/axis_aligned_iou_loss.py
+++ b/mmdet3d/models/losses/axis_aligned_iou_loss.py
@@ -54,7 +54,7 @@ class AxisAlignedIoULoss(nn.Module):
        Args:
            pred (torch.Tensor): Bbox predictions with shape [..., 3].
            target (torch.Tensor): Bbox targets (gt) with shape [..., 3].
-            weight (torch.Tensor|float, optional): Weight of loss. \
+            weight (torch.Tensor | float, optional): Weight of loss.
                Defaults to None.
            avg_factor (int, optional): Average factor that is used to average
                the loss. Defaults to None.

--- a/mmdet3d/models/losses/chamfer_distance.py
+++ b/mmdet3d/models/losses/chamfer_distance.py
@@ -29,13 +29,13 @@ def chamfer_distance(src,
    Returns:
        tuple: Source and Destination loss with the corresponding indices.

-            - loss_src (torch.Tensor): The min distance \
+            - loss_src (torch.Tensor): The min distance
                from source to destination.
-            - loss_dst (torch.Tensor): The min distance \
+            - loss_dst (torch.Tensor): The min distance
                from destination to source.
-            - indices1 (torch.Tensor): Index the min distance point \
+            - indices1 (torch.Tensor): Index the min distance point
                for each point in source to destination.
-            - indices2 (torch.Tensor): Index the min distance point \
+            - indices2 (torch.Tensor): Index the min distance point
                for each point in destination to source.
    """

@@ -125,10 +125,10 @@ class ChamferDistance(nn.Module):
                Defaults to False.

        Returns:
-            tuple[torch.Tensor]: If ``return_indices=True``, return losses of \
-                source and target with their corresponding indices in the \
-                order of ``(loss_source, loss_target, indices1, indices2)``. \
-                If ``return_indices=False``, return \
+            tuple[torch.Tensor]: If ``return_indices=True``, return losses of
+                source and target with their corresponding indices in the
+                order of ``(loss_source, loss_target, indices1, indices2)``.
+                If ``return_indices=False``, return
                ``(loss_source, loss_target)``.
        """
        assert reduction_override in (None, 'none', 'mean', 'sum')

--- a/mmdet3d/models/losses/multibin_loss.py
+++ b/mmdet3d/models/losses/multibin_loss.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+from torch import nn as nn
+from torch.nn import functional as F
+
+from mmdet.models.builder import LOSSES
+from mmdet.models.losses.utils import weighted_loss
+
+
+@weighted_loss
+def multibin_loss(pred_orientations, gt_orientations, num_dir_bins=4):
+    """Multi-Bin Loss.
+
+    Args:
+        pred_orientations(torch.Tensor): Predicted local vector
+            orientation in [axis_cls, head_cls, sin, cos] format.
+            shape (N, num_dir_bins * 4)
+        gt_orientations(torch.Tensor): Corresponding gt bboxes,
+            shape (N, num_dir_bins * 2).
+        num_dir_bins(int, optional): Number of bins to encode
+            direction angle.
+            Defaults: 4.
+
+    Return:
+        torch.Tensor: Loss tensor.
+    """
+    cls_losses = 0
+    reg_losses = 0
+    reg_cnt = 0
+    for i in range(num_dir_bins):
+        # bin cls loss
+        cls_ce_loss = F.cross_entropy(
+            pred_orientations[:, (i * 2):(i * 2 + 2)],
+            gt_orientations[:, i].long(),
+            reduction='mean')
+        # regression loss
+        valid_mask_i = (gt_orientations[:, i] == 1)
+        cls_losses += cls_ce_loss
+        if valid_mask_i.sum() > 0:
+            start = num_dir_bins * 2 + i * 2
+            end = start + 2
+            pred_offset = F.normalize(pred_orientations[valid_mask_i,
+                                                        start:end])
+            gt_offset_sin = torch.sin(gt_orientations[valid_mask_i,
+                                                      num_dir_bins + i])
+            gt_offset_cos = torch.cos(gt_orientations[valid_mask_i,
+                                                      num_dir_bins + i])
+            reg_loss = \
+                F.l1_loss(pred_offset[:, 0], gt_offset_sin,
+                          reduction='none') + \
+                F.l1_loss(pred_offset[:, 1], gt_offset_cos,
+                          reduction='none')
+
+            reg_losses += reg_loss.sum()
+            reg_cnt += valid_mask_i.sum()
+
+        return cls_losses / num_dir_bins + reg_losses / reg_cnt
+
+
+@LOSSES.register_module()
+class MultiBinLoss(nn.Module):
+    """Multi-Bin Loss for orientation.
+
+    Args:
+        reduction (str, optional): The method to reduce the loss.
+            Options are 'none', 'mean' and 'sum'. Defaults to 'none'.
+        loss_weight (float, optional): The weight of loss. Defaults
+            to 1.0.
+    """
+
+    def __init__(self, reduction='none', loss_weight=1.0):
+        super(MultiBinLoss, self).__init__()
+        assert reduction in ['none', 'sum', 'mean']
+        self.reduction = reduction
+        self.loss_weight = loss_weight
+
+    def forward(self, pred, target, num_dir_bins, reduction_override=None):
+        """Forward function.
+
+        Args:
+            pred (torch.Tensor): The prediction.
+            target (torch.Tensor): The learning target of the prediction.
+            num_dir_bins (int): Number of bins to encode direction angle.
+            reduction_override (str, optional): The reduction method used to
+                override the original reduction method of the loss.
+                Defaults to None.
+        """
+        assert reduction_override in (None, 'none', 'mean', 'sum')
+        reduction = (
+            reduction_override if reduction_override else self.reduction)
+        loss = self.loss_weight * multibin_loss(
+            pred, target, num_dir_bins=num_dir_bins, reduction=reduction)
+        return loss
--- a/mmdet3d/models/losses/uncertain_smooth_l1_loss.py
+++ b/mmdet3d/models/losses/uncertain_smooth_l1_loss.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+from torch import nn as nn
+
+from mmdet.models.builder import LOSSES
+from mmdet.models.losses.utils import weighted_loss
+
+
+@weighted_loss
+def uncertain_smooth_l1_loss(pred, target, sigma, alpha=1.0, beta=1.0):
+    """Smooth L1 loss with uncertainty.
+
+    Args:
+        pred (torch.Tensor): The prediction.
+        target (torch.Tensor): The learning target of the prediction.
+        sigma (torch.Tensor): The sigma for uncertainty.
+        alpha (float, optional): The coefficient of log(sigma).
+            Defaults to 1.0.
+        beta (float, optional): The threshold in the piecewise function.
+            Defaults to 1.0.
+
+    Returns:
+        torch.Tensor: Calculated loss
+    """
+    assert beta > 0
+    assert target.numel() > 0
+    assert pred.size() == target.size() == sigma.size(), 'The size of pred ' \
+        f'{pred.size()}, target {target.size()}, and sigma {sigma.size()} ' \
+        'are inconsistent.'
+    diff = torch.abs(pred - target)
+    loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
+                       diff - 0.5 * beta)
+    loss = torch.exp(-sigma) * loss + alpha * sigma
+
+    return loss
+
+
+@weighted_loss
+def uncertain_l1_loss(pred, target, sigma, alpha=1.0):
+    """L1 loss with uncertainty.
+
+    Args:
+        pred (torch.Tensor): The prediction.
+        target (torch.Tensor): The learning target of the prediction.
+        sigma (torch.Tensor): The sigma for uncertainty.
+        alpha (float, optional): The coefficient of log(sigma).
+            Defaults to 1.0.
+
+    Returns:
+        torch.Tensor: Calculated loss
+    """
+    assert target.numel() > 0
+    assert pred.size() == target.size() == sigma.size(), 'The size of pred ' \
+        f'{pred.size()}, target {target.size()}, and sigma {sigma.size()} ' \
+        'are inconsistent.'
+    loss = torch.abs(pred - target)
+    loss = torch.exp(-sigma) * loss + alpha * sigma
+    return loss
+
+
+@LOSSES.register_module()
+class UncertainSmoothL1Loss(nn.Module):
+    r"""Smooth L1 loss with uncertainty.
+
+    Please refer to `PGD <https://arxiv.org/abs/2107.14160>`_ and
+    `Multi-Task Learning Using Uncertainty to Weigh Losses for Scene Geometry
+    and Semantics <https://arxiv.org/abs/1705.07115>`_ for more details.
+
+    Args:
+        alpha (float, optional): The coefficient of log(sigma).
+            Defaults to 1.0.
+        beta (float, optional): The threshold in the piecewise function.
+            Defaults to 1.0.
+        reduction (str, optional): The method to reduce the loss.
+            Options are 'none', 'mean' and 'sum'. Defaults to 'mean'.
+        loss_weight (float, optional): The weight of loss. Defaults to 1.0
+    """
+
+    def __init__(self, alpha=1.0, beta=1.0, reduction='mean', loss_weight=1.0):
+        super(UncertainSmoothL1Loss, self).__init__()
+        assert reduction in ['none', 'sum', 'mean']
+        self.alpha = alpha
+        self.beta = beta
+        self.reduction = reduction
+        self.loss_weight = loss_weight
+
+    def forward(self,
+                pred,
+                target,
+                sigma,
+                weight=None,
+                avg_factor=None,
+                reduction_override=None,
+                **kwargs):
+        """Forward function.
+
+        Args:
+            pred (torch.Tensor): The prediction.
+            target (torch.Tensor): The learning target of the prediction.
+            sigma (torch.Tensor): The sigma for uncertainty.
+            weight (torch.Tensor, optional): The weight of loss for each
+                prediction. Defaults to None.
+            avg_factor (int, optional): Average factor that is used to average
+                the loss. Defaults to None.
+            reduction_override (str, optional): The reduction method used to
+                override the original reduction method of the loss.
+                Defaults to None.
+        """
+        assert reduction_override in (None, 'none', 'mean', 'sum')
+        reduction = (
+            reduction_override if reduction_override else self.reduction)
+        loss_bbox = self.loss_weight * uncertain_smooth_l1_loss(
+            pred,
+            target,
+            weight,
+            sigma=sigma,
+            alpha=self.alpha,
+            beta=self.beta,
+            reduction=reduction,
+            avg_factor=avg_factor,
+            **kwargs)
+        return loss_bbox
+
+
+@LOSSES.register_module()
+class UncertainL1Loss(nn.Module):
+    """L1 loss with uncertainty.
+
+    Args:
+        alpha (float, optional): The coefficient of log(sigma).
+            Defaults to 1.0.
+        reduction (str, optional): The method to reduce the loss.
+            Options are 'none', 'mean' and 'sum'. Defaults to 'mean'.
+        loss_weight (float, optional): The weight of loss. Defaults to 1.0.
+    """
+
+    def __init__(self, alpha=1.0, reduction='mean', loss_weight=1.0):
+        super(UncertainL1Loss, self).__init__()
+        assert reduction in ['none', 'sum', 'mean']
+        self.alpha = alpha
+        self.reduction = reduction
+        self.loss_weight = loss_weight
+
+    def forward(self,
+                pred,
+                target,
+                sigma,
+                weight=None,
+                avg_factor=None,
+                reduction_override=None):
+        """Forward function.
+
+        Args:
+            pred (torch.Tensor): The prediction.
+            target (torch.Tensor): The learning target of the prediction.
+            sigma (torch.Tensor): The sigma for uncertainty.
+            weight (torch.Tensor, optional): The weight of loss for each
+                prediction. Defaults to None.
+            avg_factor (int, optional): Average factor that is used to average
+                the loss. Defaults to None.
+            reduction_override (str, optional): The reduction method used to
+                override the original reduction method of the loss.
+                Defaults to None.
+        """
+        assert reduction_override in (None, 'none', 'mean', 'sum')
+        reduction = (
+            reduction_override if reduction_override else self.reduction)
+        loss_bbox = self.loss_weight * uncertain_l1_loss(
+            pred,
+            target,
+            weight,
+            sigma=sigma,
+            alpha=self.alpha,
+            reduction=reduction,
+            avg_factor=avg_factor)
+        return loss_bbox
--- a/mmdet3d/models/middle_encoders/sparse_encoder.py
+++ b/mmdet3d/models/middle_encoders/sparse_encoder.py
@@ -14,19 +14,21 @@ class SparseEncoder(nn.Module):
    Args:
        in_channels (int): The number of input channels.
        sparse_shape (list[int]): The sparse shape of input tensor.
-        order (list[str]): Order of conv module. Defaults to ('conv',
-            'norm', 'act').
-        norm_cfg (dict): Config of normalization layer. Defaults to
+        order (list[str], optional): Order of conv module.
+            Defaults to ('conv', 'norm', 'act').
+        norm_cfg (dict, optional): Config of normalization layer. Defaults to
            dict(type='BN1d', eps=1e-3, momentum=0.01).
-        base_channels (int): Out channels for conv_input layer.
+        base_channels (int, optional): Out channels for conv_input layer.
            Defaults to 16.
-        output_channels (int): Out channels for conv_out layer.
+        output_channels (int, optional): Out channels for conv_out layer.
            Defaults to 128.
-        encoder_channels (tuple[tuple[int]]):
+        encoder_channels (tuple[tuple[int]], optional):
            Convolutional channels of each encode block.
-        encoder_paddings (tuple[tuple[int]]): Paddings of each encode block.
+        encoder_paddings (tuple[tuple[int]], optional):
+            Paddings of each encode block.
            Defaults to ((16, ), (32, 32, 32), (64, 64, 64), (64, 64, 64)).
-        block_type (str): Type of the block to use. Defaults to 'conv_module'.
+        block_type (str, optional): Type of the block to use.
+            Defaults to 'conv_module'.
    """

    def __init__(self,
@@ -99,7 +101,7 @@ class SparseEncoder(nn.Module):

        Args:
            voxel_features (torch.float32): Voxel features in shape (N, C).
-            coors (torch.int32): Coordinates in shape (N, 4), \
+            coors (torch.int32): Coordinates in shape (N, 4),
                the columns in the order of (batch_idx, z_idx, y_idx, x_idx).
            batch_size (int): Batch size.

@@ -139,9 +141,9 @@ class SparseEncoder(nn.Module):
            make_block (method): A bounded function to build blocks.
            norm_cfg (dict[str]): Config of normalization layer.
            in_channels (int): The number of encoder input channels.
-            block_type (str): Type of the block to use. Defaults to
-                'conv_module'.
-            conv_cfg (dict): Config of conv layer. Defaults to
+            block_type (str, optional): Type of the block to use.
+                Defaults to 'conv_module'.
+            conv_cfg (dict, optional): Config of conv layer. Defaults to
                dict(type='SubMConv3d').

        Returns:

--- a/mmdet3d/models/model_utils/__init__.py
+++ b/mmdet3d/models/model_utils/__init__.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from .edge_fusion_module import EdgeFusionModule
 from .transformer import GroupFree3DMHA
 from .vote_module import VoteModule

-__all__ = ['VoteModule', 'GroupFree3DMHA']
+__all__ = ['VoteModule', 'GroupFree3DMHA', 'EdgeFusionModule']
--- a/mmdet3d/models/model_utils/edge_fusion_module.py
+++ b/mmdet3d/models/model_utils/edge_fusion_module.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.cnn import ConvModule
+from mmcv.runner import BaseModule
+from torch import nn as nn
+from torch.nn import functional as F
+
+
+class EdgeFusionModule(BaseModule):
+    """Edge Fusion Module for feature map.
+
+    Args:
+        out_channels (int): The number of output channels.
+        feat_channels (int): The number of channels in feature map
+            during edge feature fusion.
+        kernel_size (int, optional): Kernel size of convolution.
+            Default: 3.
+        act_cfg (dict, optional): Config of activation.
+            Default: dict(type='ReLU').
+        norm_cfg (dict, optional): Config of normalization.
+            Default: dict(type='BN1d')).
+    """
+
+    def __init__(self,
+                 out_channels,
+                 feat_channels,
+                 kernel_size=3,
+                 act_cfg=dict(type='ReLU'),
+                 norm_cfg=dict(type='BN1d')):
+        super().__init__()
+        self.edge_convs = nn.Sequential(
+            ConvModule(
+                feat_channels,
+                feat_channels,
+                kernel_size=kernel_size,
+                padding=kernel_size // 2,
+                conv_cfg=dict(type='Conv1d'),
+                norm_cfg=norm_cfg,
+                act_cfg=act_cfg),
+            nn.Conv1d(feat_channels, out_channels, kernel_size=1))
+        self.feat_channels = feat_channels
+
+    def forward(self, features, fused_features, edge_indices, edge_lens,
+                output_h, output_w):
+        """Forward pass.
+
+        Args:
+            features (torch.Tensor): Different representative features
+                for fusion.
+            fused_features (torch.Tensor): Different representative
+                features to be fused.
+            edge_indices (torch.Tensor): Batch image edge indices.
+            edge_lens (list[int]): List of edge length of each image.
+            output_h (int): Height of output feature map.
+            output_w (int): Width of output feature map.
+
+        Returns:
+            torch.Tensor: Fused feature maps.
+        """
+        batch_size = features.shape[0]
+        # normalize
+        grid_edge_indices = edge_indices.view(batch_size, -1, 1, 2).float()
+        grid_edge_indices[..., 0] = \
+            grid_edge_indices[..., 0] / (output_w - 1) * 2 - 1
+        grid_edge_indices[..., 1] = \
+            grid_edge_indices[..., 1] / (output_h - 1) * 2 - 1
+
+        # apply edge fusion
+        edge_features = F.grid_sample(
+            features, grid_edge_indices, align_corners=True).squeeze(-1)
+        edge_output = self.edge_convs(edge_features)
+
+        for k in range(batch_size):
+            edge_indice_k = edge_indices[k, :edge_lens[k]]
+            fused_features[k, :, edge_indice_k[:, 1],
+                           edge_indice_k[:, 0]] += edge_output[
+                               k, :, :edge_lens[k]]
+
+        return fused_features
--- a/mmdet3d/models/model_utils/transformer.py
+++ b/mmdet3d/models/model_utils/transformer.py
@@ -15,15 +15,16 @@ class GroupFree3DMHA(MultiheadAttention):
        embed_dims (int): The embedding dimension.
        num_heads (int): Parallel attention heads. Same as
            `nn.MultiheadAttention`.
-        attn_drop (float): A Dropout layer on attn_output_weights. Default 0.0.
-        proj_drop (float): A Dropout layer. Default 0.0.
-        dropout_layer (obj:`ConfigDict`): The dropout_layer used
+        attn_drop (float, optional): A Dropout layer on attn_output_weights.
+            Defaults to 0.0.
+        proj_drop (float, optional): A Dropout layer. Defaults to 0.0.
+        dropout_layer (obj:`ConfigDict`, optional): The dropout_layer used
            when adding the shortcut.
-        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
-            Default: None.
-        batch_first (bool): Key, Query and Value are shape of
+        init_cfg (obj:`mmcv.ConfigDict`, optional): The Config for
+            initialization. Default: None.
+        batch_first (bool, optional): Key, Query and Value are shape of
            (batch, n, embed_dim)
-            or (n, batch, embed_dim). Default to False.
+            or (n, batch, embed_dim). Defaults to False.
    """

    def __init__(self,
@@ -58,26 +59,26 @@ class GroupFree3DMHA(MultiheadAttention):
                embed_dims]. Same in `nn.MultiheadAttention.forward`.
            key (Tensor): The key tensor with shape [num_keys, bs,
                embed_dims]. Same in `nn.MultiheadAttention.forward`.
-                If None, the ``query`` will be used. Defaults to None.
+                If None, the ``query`` will be used.
            value (Tensor): The value tensor with same shape as `key`.
-                Same in `nn.MultiheadAttention.forward`. Defaults to None.
+                Same in `nn.MultiheadAttention.forward`.
                If None, the `key` will be used.
            identity (Tensor): This tensor, with the same shape as x,
-                will be used for the identity link.
-                If None, `x` will be used. Defaults to None.
-            query_pos (Tensor): The positional encoding for query, with
-                the same shape as `x`. If not None, it will
-                be added to `x` before forward function. Defaults to None.
-            key_pos (Tensor): The positional encoding for `key`, with the
-                same shape as `key`. Defaults to None. If not None, it will
-                be added to `key` before forward function. If None, and
-                `query_pos` has the same shape as `key`, then `query_pos`
+                will be used for the identity link. If None, `x` will be used.
+            query_pos (Tensor, optional): The positional encoding for query,
+                with the same shape as `x`. Defaults to None.
+                If not None, it will be added to `x` before forward function.
+            key_pos (Tensor, optional): The positional encoding for `key`,
+                with the same shape as `key`. Defaults to None. If not None,
+                it will be added to `key` before forward function. If None,
+                and `query_pos` has the same shape as `key`, then `query_pos`
                will be used for `key_pos`. Defaults to None.
-            attn_mask (Tensor): ByteTensor mask with shape [num_queries,
-                num_keys]. Same in `nn.MultiheadAttention.forward`.
-                Defaults to None.
-            key_padding_mask (Tensor): ByteTensor with shape [bs, num_keys].
+            attn_mask (Tensor, optional): ByteTensor mask with shape
+                [num_queries, num_keys].
                Same in `nn.MultiheadAttention.forward`. Defaults to None.
+            key_padding_mask (Tensor, optional): ByteTensor with shape
+                [bs, num_keys]. Same in `nn.MultiheadAttention.forward`.
+                Defaults to None.

        Returns:
            Tensor: forwarded results with shape [num_queries, bs, embed_dims].
@@ -113,7 +114,7 @@ class ConvBNPositionalEncoding(nn.Module):

    Args:
        input_channel (int): input features dim.
-        num_pos_feats (int): output position features dim.
+        num_pos_feats (int, optional): output position features dim.
            Defaults to 288 to be consistent with seed features dim.
    """

@@ -131,7 +132,7 @@ class ConvBNPositionalEncoding(nn.Module):
            xyz (Tensor)： (B, N, 3) the coordinates to embed.

        Returns:
-            Tensor: (B, num_pos_feats, N) the embeded position features.
+            Tensor: (B, num_pos_feats, N) the embedded position features.
        """
        xyz = xyz.permute(0, 2, 1)
        position_embedding = self.position_embedding_head(xyz)

--- a/mmdet3d/models/model_utils/vote_module.py
+++ b/mmdet3d/models/model_utils/vote_module.py
@@ -14,22 +14,25 @@ class VoteModule(nn.Module):

    Args:
        in_channels (int): Number of channels of seed point features.
-        vote_per_seed (int): Number of votes generated from each seed point.
-        gt_per_seed (int): Number of ground truth votes generated
-            from each seed point.
-        num_points (int): Number of points to be used for voting.
-        conv_channels (tuple[int]): Out channels of vote
-            generating convolution.
-        conv_cfg (dict): Config of convolution.
+        vote_per_seed (int, optional): Number of votes generated from
+            each seed point. Default: 1.
+        gt_per_seed (int, optional): Number of ground truth votes generated
+            from each seed point. Default: 3.
+        num_points (int, optional): Number of points to be used for voting.
+            Default: 1.
+        conv_channels (tuple[int], optional): Out channels of vote
+            generating convolution. Default: (16, 16).
+        conv_cfg (dict, optional): Config of convolution.
            Default: dict(type='Conv1d').
-        norm_cfg (dict): Config of normalization.
+        norm_cfg (dict, optional): Config of normalization.
            Default: dict(type='BN1d').
-        norm_feats (bool): Whether to normalize features.
+        norm_feats (bool, optional): Whether to normalize features.
            Default: True.
-        with_res_feat (bool): Whether to predict residual features.
+        with_res_feat (bool, optional): Whether to predict residual features.
            Default: True.
-        vote_xyz_range (list[float], None): The range of points translation.
-        vote_loss (dict): Config of vote loss.
+        vote_xyz_range (list[float], optional):
+            The range of points translation. Default: None.
+        vote_loss (dict, optional): Config of vote loss. Default: None.
    """

    def __init__(self,
@@ -95,10 +98,10 @@ class VoteModule(nn.Module):
        Returns:
            tuple[torch.Tensor]:

-                - vote_points: Voted xyz based on the seed points \
+                - vote_points: Voted xyz based on the seed points
                    with shape (B, M, 3), ``M=num_seed*vote_per_seed``.
-                - vote_features: Voted features based on the seed points with \
-                    shape (B, C, M) where ``M=num_seed*vote_per_seed``, \
+                - vote_features: Voted features based on the seed points with
+                    shape (B, C, M) where ``M=num_seed*vote_per_seed``,
                    ``C=vote_feature_dim``.
        """
        if self.num_points != -1:

--- a/mmdet3d/models/necks/__init__.py
+++ b/mmdet3d/models/necks/__init__.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from mmdet.models.necks.fpn import FPN
+from .dla_neck import DLANeck
 from .imvoxel_neck import OutdoorImVoxelNeck
+from .pointnet2_fp_neck import PointNetFPNeck
 from .second_fpn import SECONDFPN

-__all__ = ['FPN', 'SECONDFPN', 'OutdoorImVoxelNeck']
+__all__ = [
+    'FPN', 'SECONDFPN', 'OutdoorImVoxelNeck', 'PointNetFPNeck', 'DLANeck'
+]
--- a/mmdet3d/models/necks/dla_neck.py
+++ b/mmdet3d/models/necks/dla_neck.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import math
+
+import numpy as np
+from mmcv.cnn import ConvModule, build_conv_layer
+from mmcv.runner import BaseModule
+from torch import nn as nn
+
+from mmdet.models.builder import NECKS
+
+
+def fill_up_weights(up):
+    """Simulated bilinear upsampling kernel.
+
+    Args:
+        up (nn.Module): ConvTranspose2d module.
+    """
+    w = up.weight.data
+    f = math.ceil(w.size(2) / 2)
+    c = (2 * f - 1 - f % 2) / (2. * f)
+    for i in range(w.size(2)):
+        for j in range(w.size(3)):
+            w[0, 0, i, j] = \
+                (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
+    for c in range(1, w.size(0)):
+        w[c, 0, :, :] = w[0, 0, :, :]
+
+
+class IDAUpsample(BaseModule):
+    """Iterative Deep Aggregation (IDA) Upsampling module to upsample features
+    of different scales to a similar scale.
+
+    Args:
+        out_channels (int): Number of output channels for DeformConv.
+        in_channels (List[int]): List of input channels of multi-scale
+            feature maps.
+        kernel_sizes (List[int]): List of size of the convolving
+            kernel of different scales.
+        norm_cfg (dict, optional): Config dict for normalization layer.
+            Default: None.
+        use_dcn (bool, optional): If True, use DCNv2. Default: True.
+    """
+
+    def __init__(
+        self,
+        out_channels,
+        in_channels,
+        kernel_sizes,
+        norm_cfg=None,
+        use_dcn=True,
+        init_cfg=None,
+    ):
+        super(IDAUpsample, self).__init__(init_cfg)
+        self.use_dcn = use_dcn
+        self.projs = nn.ModuleList()
+        self.ups = nn.ModuleList()
+        self.nodes = nn.ModuleList()
+
+        for i in range(1, len(in_channels)):
+            in_channel = in_channels[i]
+            up_kernel_size = int(kernel_sizes[i])
+            proj = ConvModule(
+                in_channel,
+                out_channels,
+                3,
+                padding=1,
+                bias=True,
+                conv_cfg=dict(type='DCNv2') if self.use_dcn else None,
+                norm_cfg=norm_cfg)
+            node = ConvModule(
+                out_channels,
+                out_channels,
+                3,
+                padding=1,
+                bias=True,
+                conv_cfg=dict(type='DCNv2') if self.use_dcn else None,
+                norm_cfg=norm_cfg)
+            up = build_conv_layer(
+                dict(type='deconv'),
+                out_channels,
+                out_channels,
+                up_kernel_size * 2,
+                stride=up_kernel_size,
+                padding=up_kernel_size // 2,
+                output_padding=0,
+                groups=out_channels,
+                bias=False)
+
+            self.projs.append(proj)
+            self.ups.append(up)
+            self.nodes.append(node)
+
+    def forward(self, mlvl_features, start_level, end_level):
+        """Forward function.
+
+        Args:
+            mlvl_features (list[torch.Tensor]): Features from multiple layers.
+            start_level (int): Start layer for feature upsampling.
+            end_level (int): End layer for feature upsampling.
+        """
+        for i in range(start_level, end_level - 1):
+            upsample = self.ups[i - start_level]
+            project = self.projs[i - start_level]
+            mlvl_features[i + 1] = upsample(project(mlvl_features[i + 1]))
+            node = self.nodes[i - start_level]
+            mlvl_features[i + 1] = node(mlvl_features[i + 1] +
+                                        mlvl_features[i])
+
+
+class DLAUpsample(BaseModule):
+    """Deep Layer Aggregation (DLA) Upsampling module for different scales
+    feature extraction, upsampling and fusion, It consists of groups of
+    IDAupsample modules.
+
+    Args:
+        start_level (int): The start layer.
+        channels (List[int]): List of input channels of multi-scale
+            feature maps.
+        scales(List[int]): List of scale of different layers' feature.
+        in_channels (NoneType, optional): List of input channels of
+            different scales. Default: None.
+        norm_cfg (dict, optional): Config dict for normalization layer.
+            Default: None.
+        use_dcn (bool, optional): Whether to use dcn in IDAup module.
+            Default: True.
+    """
+
+    def __init__(self,
+                 start_level,
+                 channels,
+                 scales,
+                 in_channels=None,
+                 norm_cfg=None,
+                 use_dcn=True,
+                 init_cfg=None):
+        super(DLAUpsample, self).__init__(init_cfg)
+        self.start_level = start_level
+        if in_channels is None:
+            in_channels = channels
+        self.channels = channels
+        channels = list(channels)
+        scales = np.array(scales, dtype=int)
+        for i in range(len(channels) - 1):
+            j = -i - 2
+            setattr(
+                self, 'ida_{}'.format(i),
+                IDAUpsample(channels[j], in_channels[j:],
+                            scales[j:] // scales[j], norm_cfg, use_dcn))
+            scales[j + 1:] = scales[j]
+            in_channels[j + 1:] = [channels[j] for _ in channels[j + 1:]]
+
+    def forward(self, mlvl_features):
+        """Forward function.
+
+        Args:
+            mlvl_features(list[torch.Tensor]): Features from multi-scale
+                layers.
+
+        Returns:
+            tuple[torch.Tensor]: Up-sampled features of different layers.
+        """
+        outs = [mlvl_features[-1]]
+        for i in range(len(mlvl_features) - self.start_level - 1):
+            ida = getattr(self, 'ida_{}'.format(i))
+            ida(mlvl_features, len(mlvl_features) - i - 2, len(mlvl_features))
+            outs.insert(0, mlvl_features[-1])
+        return outs
+
+
+@NECKS.register_module()
+class DLANeck(BaseModule):
+    """DLA Neck.
+
+    Args:
+        in_channels (list[int], optional): List of input channels
+            of multi-scale feature map.
+        start_level (int, optional): The scale level where upsampling
+            starts. Default: 2.
+        end_level (int, optional): The scale level where upsampling
+            ends. Default: 5.
+        norm_cfg (dict, optional): Config dict for normalization
+            layer. Default: None.
+        use_dcn (bool, optional): Whether to use dcn in IDAup module.
+            Default: True.
+    """
+
+    def __init__(self,
+                 in_channels=[16, 32, 64, 128, 256, 512],
+                 start_level=2,
+                 end_level=5,
+                 norm_cfg=None,
+                 use_dcn=True,
+                 init_cfg=None):
+        super(DLANeck, self).__init__(init_cfg)
+        self.start_level = start_level
+        self.end_level = end_level
+        scales = [2**i for i in range(len(in_channels[self.start_level:]))]
+        self.dla_up = DLAUpsample(
+            start_level=self.start_level,
+            channels=in_channels[self.start_level:],
+            scales=scales,
+            norm_cfg=norm_cfg,
+            use_dcn=use_dcn)
+        self.ida_up = IDAUpsample(
+            in_channels[self.start_level],
+            in_channels[self.start_level:self.end_level],
+            [2**i for i in range(self.end_level - self.start_level)], norm_cfg,
+            use_dcn)
+
+    def forward(self, x):
+        mlvl_features = [x[i] for i in range(len(x))]
+        mlvl_features = self.dla_up(mlvl_features)
+        outs = []
+        for i in range(self.end_level - self.start_level):
+            outs.append(mlvl_features[i].clone())
+        self.ida_up(outs, 0, len(outs))
+        return [outs[-1]]
+
+    def init_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.ConvTranspose2d):
+                # In order to be consistent with the source code,
+                # reset the ConvTranspose2d initialization parameters
+                m.reset_parameters()
+                # Simulated bilinear upsampling kernel
+                fill_up_weights(m)
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Conv2d):
+                # In order to be consistent with the source code,
+                # reset the Conv2d initialization parameters
+                m.reset_parameters()
--- a/mmdet3d/models/necks/pointnet2_fp_neck.py
+++ b/mmdet3d/models/necks/pointnet2_fp_neck.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.runner import BaseModule
+from torch import nn as nn
+
+from mmdet3d.ops import PointFPModule
+from mmdet.models import NECKS
+
+
+@NECKS.register_module()
+class PointNetFPNeck(BaseModule):
+    r"""PointNet FP Module used in PointRCNN.
+
+    Refer to the `official code <https://github.com/charlesq34/pointnet2>`_.
+
+    .. code-block:: none
+
+        sa_n ----------------------------------------
+                                                     |
+        ... ---------------------------------        |
+                                             |       |
+        sa_1 -------------                   |       |
+                          |                  |       |
+        sa_0 -> fp_0 -> fp_module ->fp_1 -> ... -> fp_module -> fp_n
+
+    sa_n including sa_xyz (torch.Tensor) and sa_features (torch.Tensor)
+    fp_n including fp_xyz (torch.Tensor) and fp_features (torch.Tensor)
+
+    Args:
+        fp_channels (tuple[tuple[int]]): Tuple of mlp channels in FP modules.
+        init_cfg (dict or list[dict], optional): Initialization config dict.
+            Default: None
+    """
+
+    def __init__(self, fp_channels, init_cfg=None):
+        super(PointNetFPNeck, self).__init__(init_cfg=init_cfg)
+
+        self.num_fp = len(fp_channels)
+        self.FP_modules = nn.ModuleList()
+        for cur_fp_mlps in fp_channels:
+            self.FP_modules.append(PointFPModule(mlp_channels=cur_fp_mlps))
+
+    def _extract_input(self, feat_dict):
+        """Extract inputs from features dictionary.
+
+        Args:
+            feat_dict (dict): Feature dict from backbone, which may contain
+                the following keys and values:
+
+                - sa_xyz (list[torch.Tensor]): Points of each sa module
+                    in shape (N, 3).
+                - sa_features (list[torch.Tensor]): Output features of
+                    each sa module in shape (N, M).
+
+        Returns:
+            list[torch.Tensor]: Coordinates of multiple levels of points.
+            list[torch.Tensor]: Features of multiple levels of points.
+        """
+        sa_xyz = feat_dict['sa_xyz']
+        sa_features = feat_dict['sa_features']
+        assert len(sa_xyz) == len(sa_features)
+
+        return sa_xyz, sa_features
+
+    def forward(self, feat_dict):
+        """Forward pass.
+
+        Args:
+            feat_dict (dict): Feature dict from backbone.
+
+        Returns:
+            dict[str, torch.Tensor]: Outputs of the Neck.
+
+                - fp_xyz (torch.Tensor): The coordinates of fp features.
+                - fp_features (torch.Tensor): The features from the last
+                    feature propagation layers.
+        """
+        sa_xyz, sa_features = self._extract_input(feat_dict)
+
+        fp_feature = sa_features[-1]
+        fp_xyz = sa_xyz[-1]
+
+        for i in range(self.num_fp):
+            # consume the points in a bottom-up manner
+            fp_feature = self.FP_modules[i](sa_xyz[-(i + 2)], sa_xyz[-(i + 1)],
+                                            sa_features[-(i + 2)], fp_feature)
+            fp_xyz = sa_xyz[-(i + 2)]
+
+        ret = dict(fp_xyz=fp_xyz, fp_features=fp_feature)
+        return ret
--- a/mmdet3d/models/roi_heads/__init__.py
+++ b/mmdet3d/models/roi_heads/__init__.py
@@ -4,10 +4,11 @@ from .bbox_heads import PartA2BboxHead
 from .h3d_roi_head import H3DRoIHead
 from .mask_heads import PointwiseSemanticHead, PrimitiveHead
 from .part_aggregation_roi_head import PartAggregationROIHead
+from .point_rcnn_roi_head import PointRCNNRoIHead
 from .roi_extractors import Single3DRoIAwareExtractor, SingleRoIExtractor

 __all__ = [
    'Base3DRoIHead', 'PartAggregationROIHead', 'PointwiseSemanticHead',
    'Single3DRoIAwareExtractor', 'PartA2BboxHead', 'SingleRoIExtractor',
-    'H3DRoIHead', 'PrimitiveHead'
+    'H3DRoIHead', 'PrimitiveHead', 'PointRCNNRoIHead'
 ]
--- a/mmdet3d/models/roi_heads/base_3droi_head.py
+++ b/mmdet3d/models/roi_heads/base_3droi_head.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from abc import ABCMeta, abstractmethod
+
 from mmcv.runner import BaseModule



--- a/mmdet3d/models/roi_heads/bbox_heads/__init__.py
+++ b/mmdet3d/models/roi_heads/bbox_heads/__init__.py
@@ -5,9 +5,10 @@ from mmdet.models.roi_heads.bbox_heads import (BBoxHead, ConvFCBBoxHead,
                                               Shared4Conv1FCBBoxHead)
 from .h3d_bbox_head import H3DBboxHead
 from .parta2_bbox_head import PartA2BboxHead
+from .point_rcnn_bbox_head import PointRCNNBboxHead

 __all__ = [
    'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead',
    'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 'PartA2BboxHead',
-    'H3DBboxHead'
+    'H3DBboxHead', 'PointRCNNBboxHead'
 ]
--- a/mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py
+++ b/mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py
@@ -20,7 +20,7 @@ class H3DBboxHead(BaseModule):

    Args:
        num_classes (int): The number of classes.
-        suface_matching_cfg (dict): Config for suface primitive matching.
+        surface_matching_cfg (dict): Config for surface primitive matching.
        line_matching_cfg (dict): Config for line primitive matching.
        bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for encoding and
            decoding boxes.
@@ -36,7 +36,7 @@ class H3DBboxHead(BaseModule):
        primitive_refine_channels (tuple[int]): Convolution channels of
            prediction layer.
        upper_thresh (float): Threshold for line matching.
-        surface_thresh (float): Threshold for suface matching.
+        surface_thresh (float): Threshold for surface matching.
        line_thresh (float): Threshold for line matching.
        conv_cfg (dict): Config of convolution in prediction layer.
        norm_cfg (dict): Config of BN in prediction layer.
@@ -324,16 +324,16 @@ class H3DBboxHead(BaseModule):
        Args:
            bbox_preds (dict): Predictions from forward of h3d bbox head.
            points (list[torch.Tensor]): Input points.
-            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \
+            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
                bboxes of each sample.
            gt_labels_3d (list[torch.Tensor]): Labels of each sample.
-            pts_semantic_mask (None | list[torch.Tensor]): Point-wise
+            pts_semantic_mask (list[torch.Tensor]): Point-wise
                semantic mask.
-            pts_instance_mask (None | list[torch.Tensor]): Point-wise
+            pts_instance_mask (list[torch.Tensor]): Point-wise
                instance mask.
            img_metas (list[dict]): Contain pcd and img's meta info.
            rpn_targets (Tuple) : Targets generated by rpn head.
-            gt_bboxes_ignore (None | list[torch.Tensor]): Specify
+            gt_bboxes_ignore (list[torch.Tensor]): Specify
                which bounding.

        Returns:
@@ -502,7 +502,7 @@ class H3DBboxHead(BaseModule):
            box_dim=bbox.shape[-1],
            with_yaw=self.bbox_coder.with_rot,
            origin=(0.5, 0.5, 0.5))
-        box_indices = bbox.points_in_boxes(points)
+        box_indices = bbox.points_in_boxes_all(points)

        corner3d = bbox.corners
        minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6)))
@@ -560,25 +560,25 @@ class H3DBboxHead(BaseModule):

        Args:
            bbox_preds (dict): Predictions from forward of vote head.
-            size_class_targets (torch.Tensor): Ground truth \
+            size_class_targets (torch.Tensor): Ground truth
                size class of each prediction bounding box.
-            size_res_targets (torch.Tensor): Ground truth \
+            size_res_targets (torch.Tensor): Ground truth
                size residual of each prediction bounding box.
-            dir_class_targets (torch.Tensor): Ground truth \
+            dir_class_targets (torch.Tensor): Ground truth
                direction class of each prediction bounding box.
-            dir_res_targets (torch.Tensor): Ground truth \
+            dir_res_targets (torch.Tensor): Ground truth
                direction residual of each prediction bounding box.
-            center_targets (torch.Tensor): Ground truth center \
+            center_targets (torch.Tensor): Ground truth center
                of each prediction bounding box.
-            mask_targets (torch.Tensor): Validation of each \
+            mask_targets (torch.Tensor): Validation of each
                prediction bounding box.
-            objectness_targets (torch.Tensor): Ground truth \
+            objectness_targets (torch.Tensor): Ground truth
                objectness label of each prediction bounding box.
-            objectness_weights (torch.Tensor): Weights of objectness \
+            objectness_weights (torch.Tensor): Weights of objectness
                loss for each prediction bounding box.
-            box_loss_weights (torch.Tensor): Weights of regression \
+            box_loss_weights (torch.Tensor): Weights of regression
                loss for each prediction bounding box.
-            valid_gt_weights (torch.Tensor): Validation of each \
+            valid_gt_weights (torch.Tensor): Validation of each
                ground truth bounding box.

        Returns:
@@ -663,12 +663,12 @@ class H3DBboxHead(BaseModule):

        Args:
            points (list[torch.Tensor]): Points of each batch.
-            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \
+            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
                bboxes of each batch.
            gt_labels_3d (list[torch.Tensor]): Labels of each batch.
-            pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic
+            pts_semantic_mask (list[torch.Tensor]): Point-wise semantic
                label of each batch.
-            pts_instance_mask (None | list[torch.Tensor]): Point-wise instance
+            pts_instance_mask (list[torch.Tensor]): Point-wise instance
                label of each batch.
            bbox_preds (torch.Tensor): Bounding box predictions of vote head.

@@ -769,22 +769,22 @@ class H3DBboxHead(BaseModule):

        Args:
            points (torch.Tensor): Points of each batch.
-            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \
+            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth
                boxes of each batch.
            gt_labels_3d (torch.Tensor): Labels of each batch.
-            pts_semantic_mask (None | torch.Tensor): Point-wise semantic
+            pts_semantic_mask (torch.Tensor): Point-wise semantic
                label of each batch.
-            pts_instance_mask (None | torch.Tensor): Point-wise instance
+            pts_instance_mask (torch.Tensor): Point-wise instance
                label of each batch.
            aggregated_points (torch.Tensor): Aggregated points from
                vote aggregation layer.
            pred_surface_center (torch.Tensor): Prediction of surface center.
            pred_line_center (torch.Tensor): Prediction of line center.
-            pred_obj_surface_center (torch.Tensor): Objectness prediction \
+            pred_obj_surface_center (torch.Tensor): Objectness prediction
                of surface center.
-            pred_obj_line_center (torch.Tensor): Objectness prediction of \
+            pred_obj_line_center (torch.Tensor): Objectness prediction of
                line center.
-            pred_surface_sem (torch.Tensor): Semantic prediction of \
+            pred_surface_sem (torch.Tensor): Semantic prediction of
                surface center.
            pred_line_sem (torch.Tensor): Semantic prediction of line center.
        Returns:

--- a/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
+++ b/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
@@ -285,7 +285,7 @@ class PartA2BboxHead(BaseModule):

    def loss(self, cls_score, bbox_pred, rois, labels, bbox_targets,
             pos_gt_bboxes, reg_mask, label_weights, bbox_weights):
-        """Coumputing losses.
+        """Computing losses.

        Args:
            cls_score (torch.Tensor): Scores of each roi.
@@ -344,7 +344,7 @@ class PartA2BboxHead(BaseModule):

                pred_boxes3d[..., 0:3] = rotation_3d_in_axis(
                    pred_boxes3d[..., 0:3].unsqueeze(1),
-                    (pos_rois_rotation + np.pi / 2),
+                    pos_rois_rotation,
                    axis=2).squeeze(1)

                pred_boxes3d[:, 0:3] += roi_xyz
@@ -436,8 +436,7 @@ class PartA2BboxHead(BaseModule):
            pos_gt_bboxes_ct[..., 0:3] -= roi_center
            pos_gt_bboxes_ct[..., 6] -= roi_ry
            pos_gt_bboxes_ct[..., 0:3] = rotation_3d_in_axis(
-                pos_gt_bboxes_ct[..., 0:3].unsqueeze(1),
-                -(roi_ry + np.pi / 2),
+                pos_gt_bboxes_ct[..., 0:3].unsqueeze(1), -roi_ry,
                axis=2).squeeze(1)

            # flip orientation if rois have opposite orientation
@@ -462,12 +461,13 @@ class PartA2BboxHead(BaseModule):
        return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
                bbox_weights)

-    def get_corner_loss_lidar(self, pred_bbox3d, gt_bbox3d, delta=1):
+    def get_corner_loss_lidar(self, pred_bbox3d, gt_bbox3d, delta=1.0):
        """Calculate corner loss of given boxes.

        Args:
            pred_bbox3d (torch.FloatTensor): Predicted boxes in shape (N, 7).
            gt_bbox3d (torch.FloatTensor): Ground truth boxes in shape (N, 7).
+            delta (float, optional): huber loss threshold. Defaults to 1.0

        Returns:
            torch.FloatTensor: Calculated corner loss in shape (N).
@@ -490,8 +490,8 @@ class PartA2BboxHead(BaseModule):
            torch.norm(pred_box_corners - gt_box_corners_flip,
                       dim=2))  # (N, 8)
        # huber loss
-        abs_error = torch.abs(corner_dist)
-        quadratic = torch.clamp(abs_error, max=delta)
+        abs_error = corner_dist.abs()
+        quadratic = abs_error.clamp(max=delta)
        linear = (abs_error - quadratic)
        corner_loss = 0.5 * quadratic**2 + delta * linear

@@ -530,8 +530,7 @@ class PartA2BboxHead(BaseModule):
        local_roi_boxes[..., 0:3] = 0
        rcnn_boxes3d = self.bbox_coder.decode(local_roi_boxes, bbox_pred)
        rcnn_boxes3d[..., 0:3] = rotation_3d_in_axis(
-            rcnn_boxes3d[..., 0:3].unsqueeze(1), (roi_ry + np.pi / 2),
-            axis=2).squeeze(1)
+            rcnn_boxes3d[..., 0:3].unsqueeze(1), roi_ry, axis=2).squeeze(1)
        rcnn_boxes3d[:, 0:3] += roi_xyz

        # post processing
@@ -542,13 +541,13 @@ class PartA2BboxHead(BaseModule):

            cur_box_prob = class_pred[batch_id]
            cur_rcnn_boxes3d = rcnn_boxes3d[roi_batch_id == batch_id]
-            selected = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d,
-                                            cfg.score_thr, cfg.nms_thr,
-                                            img_metas[batch_id],
-                                            cfg.use_rotate_nms)
-            selected_bboxes = cur_rcnn_boxes3d[selected]
-            selected_label_preds = cur_class_labels[selected]
-            selected_scores = cur_cls_score[selected]
+            keep = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d,
+                                        cfg.score_thr, cfg.nms_thr,
+                                        img_metas[batch_id],
+                                        cfg.use_rotate_nms)
+            selected_bboxes = cur_rcnn_boxes3d[keep]
+            selected_label_preds = cur_class_labels[keep]
+            selected_scores = cur_cls_score[keep]

            result_list.append(
                (img_metas[batch_id]['box_type_3d'](selected_bboxes,
@@ -576,7 +575,7 @@ class PartA2BboxHead(BaseModule):
            box_preds (torch.Tensor): Predicted boxes in shape (N, 7+C).
            score_thr (float): Threshold of scores.
            nms_thr (float): Threshold for NMS.
-            input_meta (dict): Meta informations of the current sample.
+            input_meta (dict): Meta information of the current sample.
            use_rotate_nms (bool, optional): Whether to use rotated nms.
                Defaults to True.

@@ -620,6 +619,6 @@ class PartA2BboxHead(BaseModule):
                               dtype=torch.int64,
                               device=box_preds.device))

-        selected = torch.cat(
+        keep = torch.cat(
            selected_list, dim=0) if len(selected_list) > 0 else []
-        return selected
+        return keep
--- a/mmdet3d/models/roi_heads/bbox_heads/point_rcnn_bbox_head.py
+++ b/mmdet3d/models/roi_heads/bbox_heads/point_rcnn_bbox_head.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+from mmcv.cnn import ConvModule, normal_init
+from mmcv.cnn.bricks import build_conv_layer
+from mmcv.runner import BaseModule
+from torch import nn as nn
+
+from mmdet3d.core.bbox.structures import (LiDARInstance3DBoxes,
+                                          rotation_3d_in_axis, xywhr2xyxyr)
+from mmdet3d.models.builder import build_loss
+from mmdet3d.ops import build_sa_module
+from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu
+from mmdet.core import build_bbox_coder, multi_apply
+from mmdet.models import HEADS
+
+
+@HEADS.register_module()
+class PointRCNNBboxHead(BaseModule):
+    """PointRCNN RoI Bbox head.
+
+    Args:
+        num_classes (int): The number of classes to prediction.
+        in_channels (int)： Input channels of point features.
+        mlp_channels (list[int]): the number of mlp channels
+        pred_layer_cfg (dict, optional): Config of classfication and
+            regression prediction layers. Defaults to None.
+        num_points (tuple, optional): The number of points which each SA
+            module samples. Defaults to (128, 32, -1).
+        radius (tuple, optional): Sampling radius of each SA module.
+            Defaults to (0.2, 0.4, 100).
+        num_samples (tuple, optional): The number of samples for ball query
+            in each SA module. Defaults to (64, 64, 64).
+        sa_channels (tuple, optional): Out channels of each mlp in SA module.
+            Defaults to ((128, 128, 128), (128, 128, 256), (256, 256, 512)).
+        bbox_coder (dict, optional): Config dict of box coders.
+            Defaults to dict(type='DeltaXYZWLHRBBoxCoder').
+        sa_cfg (dict, optional): Config of set abstraction module, which may
+            contain the following keys and values:
+
+            - pool_mod (str): Pool method ('max' or 'avg') for SA modules.
+            - use_xyz (bool): Whether to use xyz as a part of features.
+            - normalize_xyz (bool): Whether to normalize xyz with radii in
+              each SA module.
+            Defaults to dict(type='PointSAModule', pool_mod='max',
+                use_xyz=True).
+        conv_cfg (dict, optional): Config dict of convolutional layers.
+             Defaults to dict(type='Conv1d').
+        norm_cfg (dict, optional): Config dict of normalization layers.
+             Defaults to dict(type='BN1d').
+        act_cfg (dict, optional): Config dict of activation layers.
+            Defaults to dict(type='ReLU').
+        bias (str, optional): Type of bias. Defaults to 'auto'.
+        loss_bbox (dict, optional): Config of regression loss function.
+            Defaults to dict(type='SmoothL1Loss', beta=1.0 / 9.0,
+                reduction='sum', loss_weight=1.0).
+        loss_cls (dict, optional): Config of classification loss function.
+             Defaults to dict(type='CrossEntropyLoss', use_sigmoid=True,
+                reduction='sum', loss_weight=1.0).
+        with_corner_loss (bool, optional): Whether using corner loss.
+            Defaults to True.
+        init_cfg (dict, optional): Config of initialization. Defaults to None.
+    """
+
+    def __init__(
+            self,
+            num_classes,
+            in_channels,
+            mlp_channels,
+            pred_layer_cfg=None,
+            num_points=(128, 32, -1),
+            radius=(0.2, 0.4, 100),
+            num_samples=(64, 64, 64),
+            sa_channels=((128, 128, 128), (128, 128, 256), (256, 256, 512)),
+            bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
+            sa_cfg=dict(type='PointSAModule', pool_mod='max', use_xyz=True),
+            conv_cfg=dict(type='Conv1d'),
+            norm_cfg=dict(type='BN1d'),
+            act_cfg=dict(type='ReLU'),
+            bias='auto',
+            loss_bbox=dict(
+                type='SmoothL1Loss',
+                beta=1.0 / 9.0,
+                reduction='sum',
+                loss_weight=1.0),
+            loss_cls=dict(
+                type='CrossEntropyLoss',
+                use_sigmoid=True,
+                reduction='sum',
+                loss_weight=1.0),
+            with_corner_loss=True,
+            init_cfg=None):
+        super(PointRCNNBboxHead, self).__init__(init_cfg=init_cfg)
+        self.num_classes = num_classes
+        self.num_sa = len(sa_channels)
+        self.with_corner_loss = with_corner_loss
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.bias = bias
+
+        self.loss_bbox = build_loss(loss_bbox)
+        self.loss_cls = build_loss(loss_cls)
+        self.bbox_coder = build_bbox_coder(bbox_coder)
+        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
+
+        self.in_channels = in_channels
+        mlp_channels = [self.in_channels] + mlp_channels
+        shared_mlps = nn.Sequential()
+        for i in range(len(mlp_channels) - 1):
+            shared_mlps.add_module(
+                f'layer{i}',
+                ConvModule(
+                    mlp_channels[i],
+                    mlp_channels[i + 1],
+                    kernel_size=(1, 1),
+                    stride=(1, 1),
+                    inplace=False,
+                    conv_cfg=dict(type='Conv2d')))
+        self.xyz_up_layer = nn.Sequential(*shared_mlps)
+
+        c_out = mlp_channels[-1]
+        self.merge_down_layer = ConvModule(
+            c_out * 2,
+            c_out,
+            kernel_size=(1, 1),
+            stride=(1, 1),
+            inplace=False,
+            conv_cfg=dict(type='Conv2d'))
+
+        pre_channels = c_out
+
+        self.SA_modules = nn.ModuleList()
+        sa_in_channel = pre_channels
+
+        for sa_index in range(self.num_sa):
+            cur_sa_mlps = list(sa_channels[sa_index])
+            cur_sa_mlps = [sa_in_channel] + cur_sa_mlps
+            sa_out_channel = cur_sa_mlps[-1]
+
+            cur_num_points = num_points[sa_index]
+            if cur_num_points <= 0:
+                cur_num_points = None
+            self.SA_modules.append(
+                build_sa_module(
+                    num_point=cur_num_points,
+                    radius=radius[sa_index],
+                    num_sample=num_samples[sa_index],
+                    mlp_channels=cur_sa_mlps,
+                    cfg=sa_cfg))
+            sa_in_channel = sa_out_channel
+        self.cls_convs = self._add_conv_branch(
+            pred_layer_cfg.in_channels, pred_layer_cfg.cls_conv_channels)
+        self.reg_convs = self._add_conv_branch(
+            pred_layer_cfg.in_channels, pred_layer_cfg.reg_conv_channels)
+
+        prev_channel = pred_layer_cfg.cls_conv_channels[-1]
+        self.conv_cls = build_conv_layer(
+            self.conv_cfg,
+            in_channels=prev_channel,
+            out_channels=self.num_classes,
+            kernel_size=1)
+        prev_channel = pred_layer_cfg.reg_conv_channels[-1]
+        self.conv_reg = build_conv_layer(
+            self.conv_cfg,
+            in_channels=prev_channel,
+            out_channels=self.bbox_coder.code_size * self.num_classes,
+            kernel_size=1)
+
+        if init_cfg is None:
+            self.init_cfg = dict(type='Xavier', layer=['Conv2d', 'Conv1d'])
+
+    def _add_conv_branch(self, in_channels, conv_channels):
+        """Add shared or separable branch.
+
+        Args:
+            in_channels (int): Input feature channel.
+            conv_channels (tuple): Middle feature channels.
+        """
+        conv_spec = [in_channels] + list(conv_channels)
+        # add branch specific conv layers
+        conv_layers = nn.Sequential()
+        for i in range(len(conv_spec) - 1):
+            conv_layers.add_module(
+                f'layer{i}',
+                ConvModule(
+                    conv_spec[i],
+                    conv_spec[i + 1],
+                    kernel_size=1,
+                    padding=0,
+                    conv_cfg=self.conv_cfg,
+                    norm_cfg=self.norm_cfg,
+                    act_cfg=self.act_cfg,
+                    bias=self.bias,
+                    inplace=True))
+        return conv_layers
+
+    def init_weights(self):
+        """Initialize weights of the head."""
+        super().init_weights()
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d):
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+        normal_init(self.conv_reg.weight, mean=0, std=0.001)
+
+    def forward(self, feats):
+        """Forward pass.
+
+        Args:
+            feats (torch.Torch): Features from RCNN modules.
+
+        Returns:
+            tuple[torch.Tensor]: Score of class and bbox predictions.
+        """
+        input_data = feats.clone().detach()
+        xyz_input = input_data[..., 0:self.in_channels].transpose(
+            1, 2).unsqueeze(dim=3).contiguous().clone().detach()
+        xyz_features = self.xyz_up_layer(xyz_input)
+        rpn_features = input_data[..., self.in_channels:].transpose(
+            1, 2).unsqueeze(dim=3)
+        merged_features = torch.cat((xyz_features, rpn_features), dim=1)
+        merged_features = self.merge_down_layer(merged_features)
+        l_xyz, l_features = [input_data[..., 0:3].contiguous()], \
+                            [merged_features.squeeze(dim=3)]
+        for i in range(len(self.SA_modules)):
+            li_xyz, li_features, cur_indices = \
+                self.SA_modules[i](l_xyz[i], l_features[i])
+            l_xyz.append(li_xyz)
+            l_features.append(li_features)
+
+        shared_features = l_features[-1]
+        x_cls = shared_features
+        x_reg = shared_features
+        x_cls = self.cls_convs(x_cls)
+        rcnn_cls = self.conv_cls(x_cls)
+        x_reg = self.reg_convs(x_reg)
+        rcnn_reg = self.conv_reg(x_reg)
+        rcnn_cls = rcnn_cls.transpose(1, 2).contiguous().squeeze(dim=1)
+        rcnn_reg = rcnn_reg.transpose(1, 2).contiguous().squeeze(dim=1)
+        return (rcnn_cls, rcnn_reg)
+
+    def loss(self, cls_score, bbox_pred, rois, labels, bbox_targets,
+             pos_gt_bboxes, reg_mask, label_weights, bbox_weights):
+        """Computing losses.
+
+        Args:
+            cls_score (torch.Tensor): Scores of each RoI.
+            bbox_pred (torch.Tensor): Predictions of bboxes.
+            rois (torch.Tensor): RoI bboxes.
+            labels (torch.Tensor): Labels of class.
+            bbox_targets (torch.Tensor): Target of positive bboxes.
+            pos_gt_bboxes (torch.Tensor): Ground truths of positive bboxes.
+            reg_mask (torch.Tensor): Mask for positive bboxes.
+            label_weights (torch.Tensor): Weights of class loss.
+            bbox_weights (torch.Tensor): Weights of bbox loss.
+
+        Returns:
+            dict: Computed losses.
+
+                - loss_cls (torch.Tensor): Loss of classes.
+                - loss_bbox (torch.Tensor): Loss of bboxes.
+                - loss_corner (torch.Tensor): Loss of corners.
+        """
+        losses = dict()
+        rcnn_batch_size = cls_score.shape[0]
+        # calculate class loss
+        cls_flat = cls_score.view(-1)
+        loss_cls = self.loss_cls(cls_flat, labels, label_weights)
+        losses['loss_cls'] = loss_cls
+
+        # calculate regression loss
+        code_size = self.bbox_coder.code_size
+        pos_inds = (reg_mask > 0)
+
+        pos_bbox_pred = bbox_pred.view(rcnn_batch_size, -1)[pos_inds].clone()
+        bbox_weights_flat = bbox_weights[pos_inds].view(-1, 1).repeat(
+            1, pos_bbox_pred.shape[-1])
+        loss_bbox = self.loss_bbox(
+            pos_bbox_pred.unsqueeze(dim=0),
+            bbox_targets.unsqueeze(dim=0).detach(),
+            bbox_weights_flat.unsqueeze(dim=0))
+        losses['loss_bbox'] = loss_bbox
+
+        if pos_inds.any() != 0 and self.with_corner_loss:
+            rois = rois.detach()
+            pos_roi_boxes3d = rois[..., 1:].view(-1, code_size)[pos_inds]
+            pos_roi_boxes3d = pos_roi_boxes3d.view(-1, code_size)
+            batch_anchors = pos_roi_boxes3d.clone().detach()
+            pos_rois_rotation = pos_roi_boxes3d[..., 6].view(-1)
+            roi_xyz = pos_roi_boxes3d[..., 0:3].view(-1, 3)
+            batch_anchors[..., 0:3] = 0
+            # decode boxes
+            pred_boxes3d = self.bbox_coder.decode(
+                batch_anchors,
+                pos_bbox_pred.view(-1, code_size)).view(-1, code_size)
+
+            pred_boxes3d[..., 0:3] = rotation_3d_in_axis(
+                pred_boxes3d[..., 0:3].unsqueeze(1), (pos_rois_rotation),
+                axis=2).squeeze(1)
+
+            pred_boxes3d[:, 0:3] += roi_xyz
+
+            # calculate corner loss
+            loss_corner = self.get_corner_loss_lidar(pred_boxes3d,
+                                                     pos_gt_bboxes)
+
+            losses['loss_corner'] = loss_corner
+        else:
+            losses['loss_corner'] = loss_cls.new_tensor(0)
+
+        return losses
+
+    def get_corner_loss_lidar(self, pred_bbox3d, gt_bbox3d, delta=1.0):
+        """Calculate corner loss of given boxes.
+
+        Args:
+            pred_bbox3d (torch.FloatTensor): Predicted boxes in shape (N, 7).
+            gt_bbox3d (torch.FloatTensor): Ground truth boxes in shape (N, 7).
+            delta (float, optional): huber loss threshold. Defaults to 1.0
+
+        Returns:
+            torch.FloatTensor: Calculated corner loss in shape (N).
+        """
+        assert pred_bbox3d.shape[0] == gt_bbox3d.shape[0]
+
+        # This is a little bit hack here because we assume the box for
+        # PointRCNN is in LiDAR coordinates
+
+        gt_boxes_structure = LiDARInstance3DBoxes(gt_bbox3d)
+        pred_box_corners = LiDARInstance3DBoxes(pred_bbox3d).corners
+        gt_box_corners = gt_boxes_structure.corners
+
+        # This flip only changes the heading direction of GT boxes
+        gt_bbox3d_flip = gt_boxes_structure.clone()
+        gt_bbox3d_flip.tensor[:, 6] += np.pi
+        gt_box_corners_flip = gt_bbox3d_flip.corners
+
+        corner_dist = torch.min(
+            torch.norm(pred_box_corners - gt_box_corners, dim=2),
+            torch.norm(pred_box_corners - gt_box_corners_flip, dim=2))
+        # huber loss
+        abs_error = corner_dist.abs()
+        quadratic = abs_error.clamp(max=delta)
+        linear = (abs_error - quadratic)
+        corner_loss = 0.5 * quadratic**2 + delta * linear
+        return corner_loss.mean(dim=1)
+
+    def get_targets(self, sampling_results, rcnn_train_cfg, concat=True):
+        """Generate targets.
+
+        Args:
+            sampling_results (list[:obj:`SamplingResult`]):
+                Sampled results from rois.
+            rcnn_train_cfg (:obj:`ConfigDict`): Training config of rcnn.
+            concat (bool, optional): Whether to concatenate targets between
+                batches. Defaults to True.
+
+        Returns:
+            tuple[torch.Tensor]: Targets of boxes and class prediction.
+        """
+        pos_bboxes_list = [res.pos_bboxes for res in sampling_results]
+        pos_gt_bboxes_list = [res.pos_gt_bboxes for res in sampling_results]
+        iou_list = [res.iou for res in sampling_results]
+        targets = multi_apply(
+            self._get_target_single,
+            pos_bboxes_list,
+            pos_gt_bboxes_list,
+            iou_list,
+            cfg=rcnn_train_cfg)
+        (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
+         bbox_weights) = targets
+
+        if concat:
+            label = torch.cat(label, 0)
+            bbox_targets = torch.cat(bbox_targets, 0)
+            pos_gt_bboxes = torch.cat(pos_gt_bboxes, 0)
+            reg_mask = torch.cat(reg_mask, 0)
+
+            label_weights = torch.cat(label_weights, 0)
+            label_weights /= torch.clamp(label_weights.sum(), min=1.0)
+
+            bbox_weights = torch.cat(bbox_weights, 0)
+            bbox_weights /= torch.clamp(bbox_weights.sum(), min=1.0)
+
+        return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
+                bbox_weights)
+
+    def _get_target_single(self, pos_bboxes, pos_gt_bboxes, ious, cfg):
+        """Generate training targets for a single sample.
+
+        Args:
+            pos_bboxes (torch.Tensor): Positive boxes with shape
+                (N, 7).
+            pos_gt_bboxes (torch.Tensor): Ground truth boxes with shape
+                (M, 7).
+            ious (torch.Tensor): IoU between `pos_bboxes` and `pos_gt_bboxes`
+                in shape (N, M).
+            cfg (dict): Training configs.
+
+        Returns:
+            tuple[torch.Tensor]: Target for positive boxes.
+                (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
+                bbox_weights)
+        """
+        cls_pos_mask = ious > cfg.cls_pos_thr
+        cls_neg_mask = ious < cfg.cls_neg_thr
+        interval_mask = (cls_pos_mask == 0) & (cls_neg_mask == 0)
+        # iou regression target
+        label = (cls_pos_mask > 0).float()
+        label[interval_mask] = (ious[interval_mask] - cfg.cls_neg_thr) / \
+            (cfg.cls_pos_thr - cfg.cls_neg_thr)
+        # label weights
+        label_weights = (label >= 0).float()
+        # box regression target
+        reg_mask = pos_bboxes.new_zeros(ious.size(0)).long()
+        reg_mask[0:pos_gt_bboxes.size(0)] = 1
+        bbox_weights = (reg_mask > 0).float()
+        if reg_mask.bool().any():
+            pos_gt_bboxes_ct = pos_gt_bboxes.clone().detach()
+            roi_center = pos_bboxes[..., 0:3]
+            roi_ry = pos_bboxes[..., 6] % (2 * np.pi)
+
+            # canonical transformation
+            pos_gt_bboxes_ct[..., 0:3] -= roi_center
+            pos_gt_bboxes_ct[..., 6] -= roi_ry
+            pos_gt_bboxes_ct[..., 0:3] = rotation_3d_in_axis(
+                pos_gt_bboxes_ct[..., 0:3].unsqueeze(1), -(roi_ry),
+                axis=2).squeeze(1)
+
+            # flip orientation if gt have opposite orientation
+            ry_label = pos_gt_bboxes_ct[..., 6] % (2 * np.pi)  # 0 ~ 2pi
+            is_opposite = (ry_label > np.pi * 0.5) & (ry_label < np.pi * 1.5)
+            ry_label[is_opposite] = (ry_label[is_opposite] + np.pi) % (
+                2 * np.pi)  # (0 ~ pi/2, 3pi/2 ~ 2pi)
+            flag = ry_label > np.pi
+            ry_label[flag] = ry_label[flag] - np.pi * 2  # (-pi/2, pi/2)
+            ry_label = torch.clamp(ry_label, min=-np.pi / 2, max=np.pi / 2)
+            pos_gt_bboxes_ct[..., 6] = ry_label
+
+            rois_anchor = pos_bboxes.clone().detach()
+            rois_anchor[:, 0:3] = 0
+            rois_anchor[:, 6] = 0
+            bbox_targets = self.bbox_coder.encode(rois_anchor,
+                                                  pos_gt_bboxes_ct)
+        else:
+            # no fg bbox
+            bbox_targets = pos_gt_bboxes.new_empty((0, 7))
+
+        return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
+                bbox_weights)
+
+    def get_bboxes(self,
+                   rois,
+                   cls_score,
+                   bbox_pred,
+                   class_labels,
+                   img_metas,
+                   cfg=None):
+        """Generate bboxes from bbox head predictions.
+
+        Args:
+            rois (torch.Tensor): RoI bounding boxes.
+            cls_score (torch.Tensor): Scores of bounding boxes.
+            bbox_pred (torch.Tensor): Bounding boxes predictions
+            class_labels (torch.Tensor): Label of classes
+            img_metas (list[dict]): Point cloud and image's meta info.
+            cfg (:obj:`ConfigDict`, optional): Testing config.
+                Defaults to None.
+
+        Returns:
+            list[tuple]: Decoded bbox, scores and labels after nms.
+        """
+        roi_batch_id = rois[..., 0]
+        roi_boxes = rois[..., 1:]  # boxes without batch id
+        batch_size = int(roi_batch_id.max().item() + 1)
+
+        # decode boxes
+        roi_ry = roi_boxes[..., 6].view(-1)
+        roi_xyz = roi_boxes[..., 0:3].view(-1, 3)
+        local_roi_boxes = roi_boxes.clone().detach()
+        local_roi_boxes[..., 0:3] = 0
+        rcnn_boxes3d = self.bbox_coder.decode(local_roi_boxes, bbox_pred)
+        rcnn_boxes3d[..., 0:3] = rotation_3d_in_axis(
+            rcnn_boxes3d[..., 0:3].unsqueeze(1), (roi_ry), axis=2).squeeze(1)
+        rcnn_boxes3d[:, 0:3] += roi_xyz
+
+        # post processing
+        result_list = []
+        for batch_id in range(batch_size):
+            cur_class_labels = class_labels[batch_id]
+            cur_cls_score = cls_score[roi_batch_id == batch_id].view(-1)
+
+            cur_box_prob = cls_score[batch_id]
+            cur_box_prob = cur_cls_score.unsqueeze(1)
+            cur_rcnn_boxes3d = rcnn_boxes3d[roi_batch_id == batch_id]
+            keep = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d,
+                                        cfg.score_thr, cfg.nms_thr,
+                                        img_metas[batch_id],
+                                        cfg.use_rotate_nms)
+            selected_bboxes = cur_rcnn_boxes3d[keep]
+            selected_label_preds = cur_class_labels[keep]
+            selected_scores = cur_cls_score[keep]
+
+            result_list.append(
+                (img_metas[batch_id]['box_type_3d'](selected_bboxes,
+                                                    self.bbox_coder.code_size),
+                 selected_scores, selected_label_preds))
+        return result_list
+
+    def multi_class_nms(self,
+                        box_probs,
+                        box_preds,
+                        score_thr,
+                        nms_thr,
+                        input_meta,
+                        use_rotate_nms=True):
+        """Multi-class NMS for box head.
+
+        Note:
+            This function has large overlap with the `box3d_multiclass_nms`
+            implemented in `mmdet3d.core.post_processing`. We are considering
+            merging these two functions in the future.
+
+        Args:
+            box_probs (torch.Tensor): Predicted boxes probabitilies in
+                shape (N,).
+            box_preds (torch.Tensor): Predicted boxes in shape (N, 7+C).
+            score_thr (float): Threshold of scores.
+            nms_thr (float): Threshold for NMS.
+            input_meta (dict): Meta information of the current sample.
+            use_rotate_nms (bool, optional): Whether to use rotated nms.
+                Defaults to True.
+
+        Returns:
+            torch.Tensor: Selected indices.
+        """
+        if use_rotate_nms:
+            nms_func = nms_gpu
+        else:
+            nms_func = nms_normal_gpu
+
+        assert box_probs.shape[
+            1] == self.num_classes, f'box_probs shape: {str(box_probs.shape)}'
+        selected_list = []
+        selected_labels = []
+        boxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](
+            box_preds, self.bbox_coder.code_size).bev)
+
+        score_thresh = score_thr if isinstance(
+            score_thr, list) else [score_thr for x in range(self.num_classes)]
+        nms_thresh = nms_thr if isinstance(
+            nms_thr, list) else [nms_thr for x in range(self.num_classes)]
+        for k in range(0, self.num_classes):
+            class_scores_keep = box_probs[:, k] >= score_thresh[k]
+
+            if class_scores_keep.int().sum() > 0:
+                original_idxs = class_scores_keep.nonzero(
+                    as_tuple=False).view(-1)
+                cur_boxes_for_nms = boxes_for_nms[class_scores_keep]
+                cur_rank_scores = box_probs[class_scores_keep, k]
+
+                cur_selected = nms_func(cur_boxes_for_nms, cur_rank_scores,
+                                        nms_thresh[k])
+
+                if cur_selected.shape[0] == 0:
+                    continue
+                selected_list.append(original_idxs[cur_selected])
+                selected_labels.append(
+                    torch.full([cur_selected.shape[0]],
+                               k + 1,
+                               dtype=torch.int64,
+                               device=box_preds.device))
+
+        keep = torch.cat(
+            selected_list, dim=0) if len(selected_list) > 0 else []
+        return keep
--- a/mmdet3d/models/roi_heads/h3d_roi_head.py
+++ b/mmdet3d/models/roi_heads/h3d_roi_head.py
@@ -65,15 +65,15 @@ class H3DRoIHead(Base3DRoIHead):
            feats_dict (dict): Contains features from the first stage.
            img_metas (list[dict]): Contain pcd and img's meta info.
            points (list[torch.Tensor]): Input points.
-            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \
+            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
                bboxes of each sample.
            gt_labels_3d (list[torch.Tensor]): Labels of each sample.
-            pts_semantic_mask (None | list[torch.Tensor]): Point-wise
+            pts_semantic_mask (list[torch.Tensor]): Point-wise
                semantic mask.
-            pts_instance_mask (None | list[torch.Tensor]): Point-wise
+            pts_instance_mask (list[torch.Tensor]): Point-wise
                instance mask.
-            gt_bboxes_ignore (None | list[torch.Tensor]): Specify
-                which bounding.
+            gt_bboxes_ignore (list[torch.Tensor]): Specify
+                which bounding boxes to ignore.

        Returns:
            dict: losses from each head.