[Refactor] Refactor code structure and docstrings (#803)

* refactor points_in_boxes * Merge same functions of three boxes * More docstring fixes and unify x/y/z size * Add "optional" and fix "Default" * Add "optional" and fix "Default" * Add "optional" and fix "Default" * Add "optional" and fix "Default" * Add "optional" and fix "Default" * Remove None in function param type * Fix unittest * Add comments for NMS functions * Merge methods of Points * Add unittest * Add optional and default value * Fix box conversion and add unittest * Fix comments * Add unit test * Indent * Fix CI * Remove useless \\ * Remove useless \\ * Remove useless \\ * Remove useless \\ * Remove useless \\ * Add unit test for box bev * More unit tests and refine docstrings in box_np_ops * Fix comment * Add deprecation warning

[Refactor] Refactor code structure and docstrings (#803)
* refactor points_in_boxes * Merge same functions of three boxes * More docstring fixes and unify x/y/z size * Add "optional" and fix "Default" * Add "optional" and fix "Default" * Add "optional" and fix "Default" * Add "optional" and fix "Default" * Add "optional" and fix "Default" * Remove None in function param type * Fix unittest * Add comments for NMS functions * Merge methods of Points * Add unittest * Add optional and default value * Fix box conversion and add unittest * Fix comments * Add unit test * Indent * Fix CI * Remove useless \\ * Remove useless \\ * Remove useless \\ * Remove useless \\ * Remove useless \\ * Add unit test for box bev * More unit tests and refine docstrings in box_np_ops * Fix comment * Add deprecation warning
53435c62 · Yezhen Cong · Tai-Wang · 4f36084f · 53435c62 · 53435c62
Commit 53435c62 authored Aug 11, 2021 by Yezhen Cong Committed by Tai-Wang Sep 24, 2021
20 changed files
--- a/mmdet3d/models/model_utils/vote_module.py
+++ b/mmdet3d/models/model_utils/vote_module.py
@@ -14,22 +14,25 @@ class VoteModule(nn.Module):

    Args:
        in_channels (int): Number of channels of seed point features.
-        vote_per_seed (int): Number of votes generated from each seed point.
-        gt_per_seed (int): Number of ground truth votes generated
-            from each seed point.
-        num_points (int): Number of points to be used for voting.
-        conv_channels (tuple[int]): Out channels of vote
-            generating convolution.
-        conv_cfg (dict): Config of convolution.
+        vote_per_seed (int, optional): Number of votes generated from
+            each seed point. Default: 1.
+        gt_per_seed (int, optional): Number of ground truth votes generated
+            from each seed point. Default: 3.
+        num_points (int, optional): Number of points to be used for voting.
+            Default: 1.
+        conv_channels (tuple[int], optional): Out channels of vote
+            generating convolution. Default: (16, 16).
+        conv_cfg (dict, optional): Config of convolution.
            Default: dict(type='Conv1d').
-        norm_cfg (dict): Config of normalization.
+        norm_cfg (dict, optional): Config of normalization.
            Default: dict(type='BN1d').
-        norm_feats (bool): Whether to normalize features.
+        norm_feats (bool, optional): Whether to normalize features.
            Default: True.
-        with_res_feat (bool): Whether to predict residual features.
+        with_res_feat (bool, optional): Whether to predict residual features.
            Default: True.
-        vote_xyz_range (list[float], None): The range of points translation.
-        vote_loss (dict): Config of vote loss.
+        vote_xyz_range (list[float], optional):
+            The range of points translation. Default: None.
+        vote_loss (dict, optional): Config of vote loss. Default: None.
    """

    def __init__(self,
@@ -95,10 +98,10 @@ class VoteModule(nn.Module):
        Returns:
            tuple[torch.Tensor]:

-                - vote_points: Voted xyz based on the seed points \
+                - vote_points: Voted xyz based on the seed points
                    with shape (B, M, 3), ``M=num_seed*vote_per_seed``.
-                - vote_features: Voted features based on the seed points with \
-                    shape (B, C, M) where ``M=num_seed*vote_per_seed``, \
+                - vote_features: Voted features based on the seed points with
+                    shape (B, C, M) where ``M=num_seed*vote_per_seed``,
                    ``C=vote_feature_dim``.
        """
        if self.num_points != -1:

--- a/mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py
+++ b/mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py
@@ -324,16 +324,16 @@ class H3DBboxHead(BaseModule):
        Args:
            bbox_preds (dict): Predictions from forward of h3d bbox head.
            points (list[torch.Tensor]): Input points.
-            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \
+            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
                bboxes of each sample.
            gt_labels_3d (list[torch.Tensor]): Labels of each sample.
-            pts_semantic_mask (None | list[torch.Tensor]): Point-wise
+            pts_semantic_mask (list[torch.Tensor]): Point-wise
                semantic mask.
-            pts_instance_mask (None | list[torch.Tensor]): Point-wise
+            pts_instance_mask (list[torch.Tensor]): Point-wise
                instance mask.
            img_metas (list[dict]): Contain pcd and img's meta info.
            rpn_targets (Tuple) : Targets generated by rpn head.
-            gt_bboxes_ignore (None | list[torch.Tensor]): Specify
+            gt_bboxes_ignore (list[torch.Tensor]): Specify
                which bounding.

        Returns:
@@ -502,7 +502,7 @@ class H3DBboxHead(BaseModule):
            box_dim=bbox.shape[-1],
            with_yaw=self.bbox_coder.with_rot,
            origin=(0.5, 0.5, 0.5))
-        box_indices = bbox.points_in_boxes_batch(points)
+        box_indices = bbox.points_in_boxes_all(points)

        corner3d = bbox.corners
        minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6)))
@@ -560,25 +560,25 @@ class H3DBboxHead(BaseModule):

        Args:
            bbox_preds (dict): Predictions from forward of vote head.
-            size_class_targets (torch.Tensor): Ground truth \
+            size_class_targets (torch.Tensor): Ground truth
                size class of each prediction bounding box.
-            size_res_targets (torch.Tensor): Ground truth \
+            size_res_targets (torch.Tensor): Ground truth
                size residual of each prediction bounding box.
-            dir_class_targets (torch.Tensor): Ground truth \
+            dir_class_targets (torch.Tensor): Ground truth
                direction class of each prediction bounding box.
-            dir_res_targets (torch.Tensor): Ground truth \
+            dir_res_targets (torch.Tensor): Ground truth
                direction residual of each prediction bounding box.
-            center_targets (torch.Tensor): Ground truth center \
+            center_targets (torch.Tensor): Ground truth center
                of each prediction bounding box.
-            mask_targets (torch.Tensor): Validation of each \
+            mask_targets (torch.Tensor): Validation of each
                prediction bounding box.
-            objectness_targets (torch.Tensor): Ground truth \
+            objectness_targets (torch.Tensor): Ground truth
                objectness label of each prediction bounding box.
-            objectness_weights (torch.Tensor): Weights of objectness \
+            objectness_weights (torch.Tensor): Weights of objectness
                loss for each prediction bounding box.
-            box_loss_weights (torch.Tensor): Weights of regression \
+            box_loss_weights (torch.Tensor): Weights of regression
                loss for each prediction bounding box.
-            valid_gt_weights (torch.Tensor): Validation of each \
+            valid_gt_weights (torch.Tensor): Validation of each
                ground truth bounding box.

        Returns:
@@ -663,12 +663,12 @@ class H3DBboxHead(BaseModule):

        Args:
            points (list[torch.Tensor]): Points of each batch.
-            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \
+            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
                bboxes of each batch.
            gt_labels_3d (list[torch.Tensor]): Labels of each batch.
-            pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic
+            pts_semantic_mask (list[torch.Tensor]): Point-wise semantic
                label of each batch.
-            pts_instance_mask (None | list[torch.Tensor]): Point-wise instance
+            pts_instance_mask (list[torch.Tensor]): Point-wise instance
                label of each batch.
            bbox_preds (torch.Tensor): Bounding box predictions of vote head.

@@ -769,22 +769,22 @@ class H3DBboxHead(BaseModule):

        Args:
            points (torch.Tensor): Points of each batch.
-            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \
+            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth
                boxes of each batch.
            gt_labels_3d (torch.Tensor): Labels of each batch.
-            pts_semantic_mask (None | torch.Tensor): Point-wise semantic
+            pts_semantic_mask (torch.Tensor): Point-wise semantic
                label of each batch.
-            pts_instance_mask (None | torch.Tensor): Point-wise instance
+            pts_instance_mask (torch.Tensor): Point-wise instance
                label of each batch.
            aggregated_points (torch.Tensor): Aggregated points from
                vote aggregation layer.
            pred_surface_center (torch.Tensor): Prediction of surface center.
            pred_line_center (torch.Tensor): Prediction of line center.
-            pred_obj_surface_center (torch.Tensor): Objectness prediction \
+            pred_obj_surface_center (torch.Tensor): Objectness prediction
                of surface center.
-            pred_obj_line_center (torch.Tensor): Objectness prediction of \
+            pred_obj_line_center (torch.Tensor): Objectness prediction of
                line center.
-            pred_surface_sem (torch.Tensor): Semantic prediction of \
+            pred_surface_sem (torch.Tensor): Semantic prediction of
                surface center.
            pred_line_sem (torch.Tensor): Semantic prediction of line center.
        Returns:

--- a/mmdet3d/models/roi_heads/h3d_roi_head.py
+++ b/mmdet3d/models/roi_heads/h3d_roi_head.py
@@ -65,15 +65,15 @@ class H3DRoIHead(Base3DRoIHead):
            feats_dict (dict): Contains features from the first stage.
            img_metas (list[dict]): Contain pcd and img's meta info.
            points (list[torch.Tensor]): Input points.
-            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \
+            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
                bboxes of each sample.
            gt_labels_3d (list[torch.Tensor]): Labels of each sample.
-            pts_semantic_mask (None | list[torch.Tensor]): Point-wise
+            pts_semantic_mask (list[torch.Tensor]): Point-wise
                semantic mask.
-            pts_instance_mask (None | list[torch.Tensor]): Point-wise
+            pts_instance_mask (list[torch.Tensor]): Point-wise
                instance mask.
-            gt_bboxes_ignore (None | list[torch.Tensor]): Specify
-                which bounding.
+            gt_bboxes_ignore (list[torch.Tensor]): Specify
+                which bounding boxes to ignore.

        Returns:
            dict: losses from each head.

--- a/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
+++ b/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
@@ -83,15 +83,15 @@ class PointwiseSemanticHead(BaseModule):
        sample.

        Args:
-            voxel_centers (torch.Tensor): The center of voxels in shape \
+            voxel_centers (torch.Tensor): The center of voxels in shape
                (voxel_num, 3).
-            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in \
+            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in
                shape (box_num, 7).
-            gt_labels_3d (torch.Tensor): Class labels of ground truths in \
+            gt_labels_3d (torch.Tensor): Class labels of ground truths in
                shape (box_num).

        Returns:
-            tuple[torch.Tensor]: Segmentation targets with shape [voxel_num] \
+            tuple[torch.Tensor]: Segmentation targets with shape [voxel_num]
                part prediction targets with shape [voxel_num, 3]
        """
        gt_bboxes_3d = gt_bboxes_3d.to(voxel_centers.device)
@@ -99,8 +99,8 @@ class PointwiseSemanticHead(BaseModule):

        part_targets = voxel_centers.new_zeros((voxel_centers.shape[0], 3),
                                               dtype=torch.float32)
-        box_idx = gt_bboxes_3d.points_in_boxes(voxel_centers)
-        enlarge_box_idx = enlarged_gt_boxes.points_in_boxes(
+        box_idx = gt_bboxes_3d.points_in_boxes_part(voxel_centers)
+        enlarge_box_idx = enlarged_gt_boxes.points_in_boxes_part(
            voxel_centers).long()

        gt_labels_pad = F.pad(
@@ -131,19 +131,19 @@ class PointwiseSemanticHead(BaseModule):
        """generate segmentation and part prediction targets.

        Args:
-            voxel_centers (torch.Tensor): The center of voxels in shape \
+            voxel_centers (torch.Tensor): The center of voxels in shape
                (voxel_num, 3).
-            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in \
+            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in
                shape (box_num, 7).
-            gt_labels_3d (torch.Tensor): Class labels of ground truths in \
+            gt_labels_3d (torch.Tensor): Class labels of ground truths in
                shape (box_num).

        Returns:
            dict: Prediction targets

-                - seg_targets (torch.Tensor): Segmentation targets \
+                - seg_targets (torch.Tensor): Segmentation targets
                    with shape [voxel_num].
-                - part_targets (torch.Tensor): Part prediction targets \
+                - part_targets (torch.Tensor): Part prediction targets
                    with shape [voxel_num, 3].
        """
        batch_size = len(gt_labels_3d)

--- a/mmdet3d/models/roi_heads/mask_heads/primitive_head.py
+++ b/mmdet3d/models/roi_heads/mask_heads/primitive_head.py
@@ -198,15 +198,15 @@ class PrimitiveHead(BaseModule):
        Args:
            bbox_preds (dict): Predictions from forward of primitive head.
            points (list[torch.Tensor]): Input points.
-            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \
+            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
                bboxes of each sample.
            gt_labels_3d (list[torch.Tensor]): Labels of each sample.
-            pts_semantic_mask (None | list[torch.Tensor]): Point-wise
+            pts_semantic_mask (list[torch.Tensor]): Point-wise
                semantic mask.
-            pts_instance_mask (None | list[torch.Tensor]): Point-wise
+            pts_instance_mask (list[torch.Tensor]): Point-wise
                instance mask.
            img_metas (list[dict]): Contain pcd and img's meta info.
-            gt_bboxes_ignore (None | list[torch.Tensor]): Specify
+            gt_bboxes_ignore (list[torch.Tensor]): Specify
                which bounding.

        Returns:
@@ -266,12 +266,12 @@ class PrimitiveHead(BaseModule):

        Args:
            points (list[torch.Tensor]): Points of each batch.
-            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \
+            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
                bboxes of each batch.
            gt_labels_3d (list[torch.Tensor]): Labels of each batch.
-            pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic
+            pts_semantic_mask (list[torch.Tensor]): Point-wise semantic
                label of each batch.
-            pts_instance_mask (None | list[torch.Tensor]): Point-wise instance
+            pts_instance_mask (list[torch.Tensor]): Point-wise instance
                label of each batch.
            bbox_preds (dict): Predictions from forward of primitive head.

@@ -333,12 +333,12 @@ class PrimitiveHead(BaseModule):

        Args:
            points (torch.Tensor): Points of each batch.
-            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \
+            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth
                boxes of each batch.
            gt_labels_3d (torch.Tensor): Labels of each batch.
-            pts_semantic_mask (None | torch.Tensor): Point-wise semantic
+            pts_semantic_mask (torch.Tensor): Point-wise semantic
                label of each batch.
-            pts_instance_mask (None | torch.Tensor): Point-wise instance
+            pts_instance_mask (torch.Tensor): Point-wise instance
                label of each batch.

        Returns:
@@ -355,7 +355,7 @@ class PrimitiveHead(BaseModule):

        # Generate pts_semantic_mask and pts_instance_mask when they are None
        if pts_semantic_mask is None or pts_instance_mask is None:
-            points2box_mask = gt_bboxes_3d.points_in_boxes_batch(points)
+            points2box_mask = gt_bboxes_3d.points_in_boxes_all(points)
            assignment = points2box_mask.argmax(1)
            background_mask = points2box_mask.max(1)[0] == 0


--- a/mmdet3d/models/segmentors/base.py
+++ b/mmdet3d/models/segmentors/base.py
@@ -78,7 +78,7 @@ class Base3DSegmentor(BaseSegmentor):
        Args:
            data (list[dict]): Input points and the information of the sample.
            result (list[dict]): Prediction results.
-            palette (list[list[int]]] | np.ndarray | None): The palette of
+            palette (list[list[int]]] | np.ndarray): The palette of
                segmentation map. If None is given, random palette will be
                generated. Default: None
            out_dir (str): Output directory of visualization result.

--- a/mmdet3d/models/segmentors/encoder_decoder.py
+++ b/mmdet3d/models/segmentors/encoder_decoder.py
@@ -187,7 +187,7 @@ class EncoderDecoder3D(Base3DSegmentor):
                          use_normalized_coord=False):
        """Generating model input.

-        Generate input by subtracting patch center and adding additional \
+        Generate input by subtracting patch center and adding additional
            features. Currently support colors and normalized xyz as features.

        Args:
@@ -195,7 +195,7 @@ class EncoderDecoder3D(Base3DSegmentor):
            patch_center (torch.Tensor): Center coordinate of the patch.
            coord_max (torch.Tensor): Max coordinate of all 3D points.
            feats (torch.Tensor): Features of sampled points of shape [S, C].
-            use_normalized_coord (bool, optional): Whether to use normalized \
+            use_normalized_coord (bool, optional): Whether to use normalized
                xyz as additional features. Defaults to False.

        Returns:
@@ -233,17 +233,17 @@ class EncoderDecoder3D(Base3DSegmentor):
            block_size (float, optional): Size of a patch to sample.
            sample_rate (float, optional): Stride used in sliding patch.
                Defaults to 0.5.
-            use_normalized_coord (bool, optional): Whether to use normalized \
+            use_normalized_coord (bool, optional): Whether to use normalized
                xyz as additional features. Defaults to False.
            eps (float, optional): A value added to patch boundary to guarantee
-                points coverage. Default 1e-3.
+                points coverage. Defaults to 1e-3.

        Returns:
            np.ndarray | np.ndarray:

-                - patch_points (torch.Tensor): Points of different patches of \
+                - patch_points (torch.Tensor): Points of different patches of
                    shape [K, N, 3+C].
-                - patch_idxs (torch.Tensor): Index of each point in \
+                - patch_idxs (torch.Tensor): Index of each point in
                    `patch_points`, of shape [K, N].
        """
        device = points.device

--- a/mmdet3d/models/utils/clip_sigmoid.py
+++ b/mmdet3d/models/utils/clip_sigmoid.py
@@ -7,8 +7,8 @@ def clip_sigmoid(x, eps=1e-4):

    Args:
        x (torch.Tensor): Input feature map with the shape of [B, N, H, W].
-        eps (float): Lower bound of the range to be clamped to. Defaults
-            to 1e-4.
+        eps (float, optional): Lower bound of the range to be clamped to.
+            Defaults to 1e-4.

    Returns:
        torch.Tensor: Feature map after sigmoid.

--- a/mmdet3d/models/utils/mlp.py
+++ b/mmdet3d/models/utils/mlp.py
@@ -10,15 +10,15 @@ class MLP(BaseModule):
    Pass features (B, C, N) through an MLP.

    Args:
-        in_channels (int): Number of channels of input features.
+        in_channels (int, optional): Number of channels of input features.
            Default: 18.
-        conv_channels (tuple[int]): Out channels of the convolution.
+        conv_channels (tuple[int], optional): Out channels of the convolution.
            Default: (256, 256).
-        conv_cfg (dict): Config of convolution.
+        conv_cfg (dict, optional): Config of convolution.
            Default: dict(type='Conv1d').
-        norm_cfg (dict): Config of normalization.
+        norm_cfg (dict, optional): Config of normalization.
            Default: dict(type='BN1d').
-        act_cfg (dict): Config of activation.
+        act_cfg (dict, optional): Config of activation.
            Default: dict(type='ReLU').
    """


--- a/mmdet3d/models/voxel_encoders/pillar_encoder.py
+++ b/mmdet3d/models/voxel_encoders/pillar_encoder.py
@@ -33,7 +33,7 @@ class PillarFeatureNet(nn.Module):
            Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01).
        mode (str, optional): The mode to gather point features. Options are
            'max' or 'avg'. Defaults to 'max'.
-        legacy (bool): Whether to use the new behavior or
+        legacy (bool, optional): Whether to use the new behavior or
            the original behavior. Defaults to True.
    """


--- a/mmdet3d/models/voxel_encoders/utils.py
+++ b/mmdet3d/models/voxel_encoders/utils.py
@@ -113,11 +113,12 @@ class PFNLayer(nn.Module):
    Args:
        in_channels (int): Number of input channels.
        out_channels (int): Number of output channels.
-        norm_cfg (dict): Config dict of normalization layers
-        last_layer (bool): If last_layer, there is no concatenation of
-            features.
-        mode (str): Pooling model to gather features inside voxels.
-            Default to 'max'.
+        norm_cfg (dict, optional): Config dict of normalization layers.
+            Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01).
+        last_layer (bool, optional): If last_layer, there is no
+            concatenation of features. Defaults to False.
+        mode (str, optional): Pooling model to gather features inside voxels.
+            Defaults to 'max'.
    """

    def __init__(self,

--- a/mmdet3d/models/voxel_encoders/voxel_encoder.py
+++ b/mmdet3d/models/voxel_encoders/voxel_encoder.py
@@ -17,7 +17,7 @@ class HardSimpleVFE(nn.Module):
    It simply averages the values of points in a voxel.

    Args:
-        num_features (int): Number of features to use. Default: 4.
+        num_features (int, optional): Number of features to use. Default: 4.
    """

    def __init__(self, num_features=4):
@@ -93,25 +93,27 @@ class DynamicVFE(nn.Module):
    The number of points inside the voxel varies.

    Args:
-        in_channels (int): Input channels of VFE. Defaults to 4.
-        feat_channels (list(int)): Channels of features in VFE.
-        with_distance (bool): Whether to use the L2 distance of points to the
-            origin point. Default False.
-        with_cluster_center (bool): Whether to use the distance to cluster
-            center of points inside a voxel. Default to False.
-        with_voxel_center (bool): Whether to use the distance to center of
-            voxel for each points inside a voxel. Default to False.
-        voxel_size (tuple[float]): Size of a single voxel. Default to
-            (0.2, 0.2, 4).
-        point_cloud_range (tuple[float]): The range of points or voxels.
-            Default to (0, -40, -3, 70.4, 40, 1).
-        norm_cfg (dict): Config dict of normalization layers.
-        mode (str): The mode when pooling features of points inside a voxel.
-            Available options include 'max' and 'avg'. Default to 'max'.
-        fusion_layer (dict | None): The config dict of fusion layer used in
-            multi-modal detectors. Default to None.
-        return_point_feats (bool): Whether to return the features of each
-            points. Default to False.
+        in_channels (int, optional): Input channels of VFE. Defaults to 4.
+        feat_channels (list(int), optional): Channels of features in VFE.
+        with_distance (bool, optional): Whether to use the L2 distance of
+            points to the origin point. Defaults to False.
+        with_cluster_center (bool, optional): Whether to use the distance
+            to cluster center of points inside a voxel. Defaults to False.
+        with_voxel_center (bool, optional): Whether to use the distance
+            to center of voxel for each points inside a voxel.
+            Defaults to False.
+        voxel_size (tuple[float], optional): Size of a single voxel.
+            Defaults to (0.2, 0.2, 4).
+        point_cloud_range (tuple[float], optional): The range of points
+            or voxels. Defaults to (0, -40, -3, 70.4, 40, 1).
+        norm_cfg (dict, optional): Config dict of normalization layers.
+        mode (str, optional): The mode when pooling features of points
+            inside a voxel. Available options include 'max' and 'avg'.
+            Defaults to 'max'.
+        fusion_layer (dict, optional): The config dict of fusion
+            layer used in multi-modal detectors. Defaults to None.
+        return_point_feats (bool, optional): Whether to return the features
+            of each points. Defaults to False.
    """

    def __init__(self,
@@ -292,25 +294,26 @@ class HardVFE(nn.Module):
    image feature into voxel features in a point-wise manner.

    Args:
-        in_channels (int): Input channels of VFE. Defaults to 4.
-        feat_channels (list(int)): Channels of features in VFE.
-        with_distance (bool): Whether to use the L2 distance of points to the
-            origin point. Default False.
-        with_cluster_center (bool): Whether to use the distance to cluster
-            center of points inside a voxel. Default to False.
-        with_voxel_center (bool): Whether to use the distance to center of
-            voxel for each points inside a voxel. Default to False.
-        voxel_size (tuple[float]): Size of a single voxel. Default to
-            (0.2, 0.2, 4).
-        point_cloud_range (tuple[float]): The range of points or voxels.
-            Default to (0, -40, -3, 70.4, 40, 1).
-        norm_cfg (dict): Config dict of normalization layers.
-        mode (str): The mode when pooling features of points inside a voxel.
-            Available options include 'max' and 'avg'. Default to 'max'.
-        fusion_layer (dict | None): The config dict of fusion layer used in
-            multi-modal detectors. Default to None.
-        return_point_feats (bool): Whether to return the features of each
-            points. Default to False.
+        in_channels (int, optional): Input channels of VFE. Defaults to 4.
+        feat_channels (list(int), optional): Channels of features in VFE.
+        with_distance (bool, optional): Whether to use the L2 distance
+            of points to the origin point. Defaults to False.
+        with_cluster_center (bool, optional): Whether to use the distance
+            to cluster center of points inside a voxel. Defaults to False.
+        with_voxel_center (bool, optional): Whether to use the distance to
+            center of voxel for each points inside a voxel. Defaults to False.
+        voxel_size (tuple[float], optional): Size of a single voxel.
+            Defaults to (0.2, 0.2, 4).
+        point_cloud_range (tuple[float], optional): The range of points
+            or voxels. Defaults to (0, -40, -3, 70.4, 40, 1).
+        norm_cfg (dict, optional): Config dict of normalization layers.
+        mode (str, optional): The mode when pooling features of points inside a
+            voxel. Available options include 'max' and 'avg'.
+            Defaults to 'max'.
+        fusion_layer (dict, optional): The config dict of fusion layer
+            used in multi-modal detectors. Defaults to None.
+        return_point_feats (bool, optional): Whether to return the
+            features of each points. Defaults to False.
    """

    def __init__(self,

--- a/mmdet3d/ops/__init__.py
+++ b/mmdet3d/ops/__init__.py
@@ -17,8 +17,8 @@ from .pointnet_modules import (PAConvCUDASAModule, PAConvCUDASAModuleMSG,
                               PAConvSAModule, PAConvSAModuleMSG,
                               PointFPModule, PointSAModule, PointSAModuleMSG,
                               build_sa_module)
-from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_batch,
-                              points_in_boxes_cpu, points_in_boxes_gpu)
+from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_all,
+                              points_in_boxes_cpu, points_in_boxes_part)
 from .sparse_block import (SparseBasicBlock, SparseBottleneck,
                           make_sparse_convmodule)
 from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization
@@ -29,12 +29,12 @@ __all__ = [
    'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization',
    'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss',
    'SigmoidFocalLoss', 'SparseBasicBlock', 'SparseBottleneck',
-    'RoIAwarePool3d', 'points_in_boxes_gpu', 'points_in_boxes_cpu',
+    'RoIAwarePool3d', 'points_in_boxes_part', 'points_in_boxes_cpu',
    'make_sparse_convmodule', 'ball_query', 'knn', 'furthest_point_sample',
    'furthest_point_sample_with_dist', 'three_interpolate', 'three_nn',
    'gather_points', 'grouping_operation', 'group_points', 'GroupAll',
    'QueryAndGroup', 'PointSAModule', 'PointSAModuleMSG', 'PointFPModule',
-    'points_in_boxes_batch', 'get_compiler_version', 'assign_score_withk',
+    'points_in_boxes_all', 'get_compiler_version', 'assign_score_withk',
    'get_compiling_cuda_version', 'Points_Sampler', 'build_sa_module',
    'PAConv', 'PAConvCUDA', 'PAConvSAModuleMSG', 'PAConvSAModule',
    'PAConvCUDASAModule', 'PAConvCUDASAModuleMSG'

--- a/mmdet3d/ops/furthest_point_sample/points_sampler.py
+++ b/mmdet3d/ops/furthest_point_sample/points_sampler.py
@@ -36,13 +36,13 @@ class Points_Sampler(nn.Module):

    Args:
        num_point (list[int]): Number of sample points.
-        fps_mod_list (list[str]: Type of FPS method, valid mod
+        fps_mod_list (list[str], optional): Type of FPS method, valid mod
            ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS'].
            F-FPS: using feature distances for FPS.
            D-FPS: using Euclidean distances of points for FPS.
            FS: using F-FPS and D-FPS simultaneously.
-        fps_sample_range_list (list[int]): Range of points to apply FPS.
-            Default: [-1].
+        fps_sample_range_list (list[int], optional):
+            Range of points to apply FPS. Default: [-1].
    """

    def __init__(self,

--- a/mmdet3d/ops/furthest_point_sample/utils.py
+++ b/mmdet3d/ops/furthest_point_sample/utils.py
@@ -7,7 +7,7 @@ def calc_square_dist(point_feat_a, point_feat_b, norm=True):
    Args:
        point_feat_a (Tensor): (B, N, C) Feature vector of each point.
        point_feat_b (Tensor): (B, M, C) Feature vector of each point.
-        norm (Bool): Whether to normalize the distance.
+        norm (Bool, optional): Whether to normalize the distance.
            Default: True.

    Returns:

--- a/mmdet3d/ops/group_points/group_points.py
+++ b/mmdet3d/ops/group_points/group_points.py
@@ -14,22 +14,22 @@ class QueryAndGroup(nn.Module):
    Groups with a ball query of radius

    Args:
-        max_radius (float | None): The maximum radius of the balls.
+        max_radius (float): The maximum radius of the balls.
            If None is given, we will use kNN sampling instead of ball query.
        sample_num (int): Maximum number of features to gather in the ball.
-        min_radius (float): The minimum radius of the balls.
-        use_xyz (bool): Whether to use xyz.
+        min_radius (float, optional): The minimum radius of the balls.
+            Default: 0.
+        use_xyz (bool, optional): Whether to use xyz.
            Default: True.
-        return_grouped_xyz (bool): Whether to return grouped xyz.
+        return_grouped_xyz (bool, optional): Whether to return grouped xyz.
            Default: False.
-        normalize_xyz (bool): Whether to normalize xyz.
+        normalize_xyz (bool, optional): Whether to normalize xyz.
            Default: False.
-        uniform_sample (bool): Whether to sample uniformly.
+        uniform_sample (bool, optional): Whether to sample uniformly.
            Default: False
-        return_unique_cnt (bool): Whether to return the count of
-            unique samples.
-            Default: False.
-        return_grouped_idx (bool): Whether to return grouped idx.
+        return_unique_cnt (bool, optional): Whether to return the count of
+            unique samples. Default: False.
+        return_grouped_idx (bool, optional): Whether to return grouped idx.
            Default: False.
    """


--- a/mmdet3d/ops/iou3d/iou3d_utils.py
+++ b/mmdet3d/ops/iou3d/iou3d_utils.py
@@ -4,7 +4,7 @@ from . import iou3d_cuda


 def boxes_iou_bev(boxes_a, boxes_b):
-    """Calculate boxes IoU in the bird view.
+    """Calculate boxes IoU in the Bird's Eye View.

    Args:
        boxes_a (torch.Tensor): Input boxes a with shape (M, 5).
@@ -22,24 +22,29 @@ def boxes_iou_bev(boxes_a, boxes_b):
    return ans_iou


-def nms_gpu(boxes, scores, thresh, pre_maxsize=None, post_max_size=None):
-    """Nms function with gpu implementation.
+def nms_gpu(boxes, scores, thresh, pre_max_size=None, post_max_size=None):
+    """NMS function GPU implementation (for BEV boxes). The overlap of two
+    boxes for IoU calculation is defined as the exact overlapping area of the
+    two boxes. In this function, one can also set `pre_max_size` and
+    `post_max_size`.

    Args:
        boxes (torch.Tensor): Input boxes with the shape of [N, 5]
            ([x1, y1, x2, y2, ry]).
        scores (torch.Tensor): Scores of boxes with the shape of [N].
        thresh (int): Threshold.
-        pre_maxsize (int): Max size of boxes before nms. Default: None.
-        post_maxsize (int): Max size of boxes after nms. Default: None.
+        pre_max_size (int, optional): Max size of boxes before NMS.
+            Default: None.
+        post_max_size (int, optional): Max size of boxes after NMS.
+            Default: None.

    Returns:
-        torch.Tensor: Indexes after nms.
+        torch.Tensor: Indexes after NMS.
    """
    order = scores.sort(0, descending=True)[1]

-    if pre_maxsize is not None:
-        order = order[:pre_maxsize]
+    if pre_max_size is not None:
+        order = order[:pre_max_size]
    boxes = boxes[order].contiguous()

    keep = torch.zeros(boxes.size(0), dtype=torch.long)
@@ -51,12 +56,14 @@ def nms_gpu(boxes, scores, thresh, pre_maxsize=None, post_max_size=None):


 def nms_normal_gpu(boxes, scores, thresh):
-    """Normal non maximum suppression on GPU.
+    """Normal NMS function GPU implementation (for BEV boxes). The overlap of
+    two boxes for IoU calculation is defined as the exact overlapping area of
+    the two boxes WITH their yaw angle set to 0.

    Args:
        boxes (torch.Tensor): Input boxes with shape (N, 5).
        scores (torch.Tensor): Scores of predicted boxes with shape (N).
-        thresh (torch.Tensor): Threshold of non maximum suppression.
+        thresh (torch.Tensor): Threshold of NMS.

    Returns:
        torch.Tensor: Remaining indices with scores in descending order.

--- a/mmdet3d/ops/pointnet_modules/paconv_sa_module.py
+++ b/mmdet3d/ops/pointnet_modules/paconv_sa_module.py
@@ -239,11 +239,12 @@ class PAConvCUDASAModuleMSG(BasePointSAModule):

        Args:
            points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.
-            features (Tensor): (B, C, N) features of each point.
+            features (Tensor, optional): (B, C, N) features of each point.
                Default: None.
-            indices (Tensor): (B, num_point) Index of the features.
+            indices (Tensor, optional): (B, num_point) Index of the features.
+                Default: None.
+            target_xyz (Tensor, optional): (B, M, 3) new coords of the outputs.
                Default: None.
-            target_xyz (Tensor): (B, M, 3) new_xyz coordinates of the outputs.

        Returns:
            Tensor: (B, M, 3) where M is the number of points.

--- a/mmdet3d/ops/pointnet_modules/point_fp_module.py
+++ b/mmdet3d/ops/pointnet_modules/point_fp_module.py
@@ -15,7 +15,7 @@ class PointFPModule(BaseModule):

    Args:
        mlp_channels (list[int]): List of mlp channels.
-        norm_cfg (dict): Type of normalization method.
+        norm_cfg (dict, optional): Type of normalization method.
            Default: dict(type='BN2d').
    """


--- a/mmdet3d/ops/pointnet_modules/point_sa_module.py
+++ b/mmdet3d/ops/pointnet_modules/point_sa_module.py
@@ -18,25 +18,25 @@ class BasePointSAModule(nn.Module):
        sample_nums (list[int]): Number of samples in each ball query.
        mlp_channels (list[list[int]]): Specify of the pointnet before
            the global pooling for each scale.
-        fps_mod (list[str]: Type of FPS method, valid mod
+        fps_mod (list[str], optional): Type of FPS method, valid mod
            ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS'].
            F-FPS: using feature distances for FPS.
            D-FPS: using Euclidean distances of points for FPS.
            FS: using F-FPS and D-FPS simultaneously.
-        fps_sample_range_list (list[int]): Range of points to apply FPS.
-            Default: [-1].
-        dilated_group (bool): Whether to use dilated ball query.
+        fps_sample_range_list (list[int], optional):
+            Range of points to apply FPS. Default: [-1].
+        dilated_group (bool, optional): Whether to use dilated ball query.
            Default: False.
-        use_xyz (bool): Whether to use xyz.
+        use_xyz (bool, optional): Whether to use xyz.
            Default: True.
-        pool_mod (str): Type of pooling method.
+        pool_mod (str, optional): Type of pooling method.
            Default: 'max_pool'.
-        normalize_xyz (bool): Whether to normalize local XYZ with radius.
-            Default: False.
-        grouper_return_grouped_xyz (bool): Whether to return grouped xyz in
-            `QueryAndGroup`. Defaults to False.
-        grouper_return_grouped_idx (bool): Whether to return grouped idx in
-            `QueryAndGroup`. Defaults to False.
+        normalize_xyz (bool, optional): Whether to normalize local XYZ
+            with radius. Default: False.
+        grouper_return_grouped_xyz (bool, optional): Whether to return
+            grouped xyz in `QueryAndGroup`. Defaults to False.
+        grouper_return_grouped_idx (bool, optional): Whether to return
+            grouped idx in `QueryAndGroup`. Defaults to False.
    """

    def __init__(self,
@@ -111,9 +111,7 @@ class BasePointSAModule(nn.Module):
        Args:
            points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.
            features (Tensor): (B, C, N) features of each point.
-                Default: None.
            indices (Tensor): (B, num_point) Index of the features.
-                Default: None.
            target_xyz (Tensor): (B, M, 3) new_xyz coordinates of the outputs.

        Returns:
@@ -169,11 +167,12 @@ class BasePointSAModule(nn.Module):

        Args:
            points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.
-            features (Tensor): (B, C, N) features of each point.
+            features (Tensor, optional): (B, C, N) features of each point.
                Default: None.
-            indices (Tensor): (B, num_point) Index of the features.
+            indices (Tensor, optional): (B, num_point) Index of the features.
+                Default: None.
+            target_xyz (Tensor, optional): (B, M, 3) new coords of the outputs.
                Default: None.
-            target_xyz (Tensor): (B, M, 3) new_xyz coordinates of the outputs.

        Returns:
            Tensor: (B, M, 3) where M is the number of points.
@@ -223,26 +222,26 @@ class PointSAModuleMSG(BasePointSAModule):
        sample_nums (list[int]): Number of samples in each ball query.
        mlp_channels (list[list[int]]): Specify of the pointnet before
            the global pooling for each scale.
-        fps_mod (list[str]: Type of FPS method, valid mod
+        fps_mod (list[str], optional): Type of FPS method, valid mod
            ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS'].
            F-FPS: using feature distances for FPS.
            D-FPS: using Euclidean distances of points for FPS.
            FS: using F-FPS and D-FPS simultaneously.
-        fps_sample_range_list (list[int]): Range of points to apply FPS.
-            Default: [-1].
-        dilated_group (bool): Whether to use dilated ball query.
+        fps_sample_range_list (list[int], optional): Range of points to
+            apply FPS. Default: [-1].
+        dilated_group (bool, optional): Whether to use dilated ball query.
            Default: False.
-        norm_cfg (dict): Type of normalization method.
+        norm_cfg (dict, optional): Type of normalization method.
            Default: dict(type='BN2d').
-        use_xyz (bool): Whether to use xyz.
+        use_xyz (bool, optional): Whether to use xyz.
            Default: True.
-        pool_mod (str): Type of pooling method.
+        pool_mod (str, optional): Type of pooling method.
            Default: 'max_pool'.
-        normalize_xyz (bool): Whether to normalize local XYZ with radius.
-            Default: False.
-        bias (bool | str): If specified as `auto`, it will be decided by the
-            norm_cfg. Bias will be set as True if `norm_cfg` is None, otherwise
-            False. Default: "auto".
+        normalize_xyz (bool, optional): Whether to normalize local XYZ
+            with radius. Default: False.
+        bias (bool | str, optional): If specified as `auto`, it will be
+            decided by `norm_cfg`. `bias` will be set as True if
+            `norm_cfg` is None, otherwise False. Default: 'auto'.
    """

    def __init__(self,
@@ -298,24 +297,24 @@ class PointSAModule(PointSAModuleMSG):
    Args:
        mlp_channels (list[int]): Specify of the pointnet before
            the global pooling for each scale.
-        num_point (int): Number of points.
+        num_point (int, optional): Number of points.
            Default: None.
-        radius (float): Radius to group with.
+        radius (float, optional): Radius to group with.
            Default: None.
-        num_sample (int): Number of samples in each ball query.
+        num_sample (int, optional): Number of samples in each ball query.
            Default: None.
-        norm_cfg (dict): Type of normalization method.
+        norm_cfg (dict, optional): Type of normalization method.
            Default: dict(type='BN2d').
-        use_xyz (bool): Whether to use xyz.
+        use_xyz (bool, optional): Whether to use xyz.
            Default: True.
-        pool_mod (str): Type of pooling method.
+        pool_mod (str, optional): Type of pooling method.
            Default: 'max_pool'.
-        fps_mod (list[str]: Type of FPS method, valid mod
+        fps_mod (list[str], optional): Type of FPS method, valid mod
            ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS'].
-        fps_sample_range_list (list[int]): Range of points to apply FPS.
-            Default: [-1].
-        normalize_xyz (bool): Whether to normalize local XYZ with radius.
-            Default: False.
+        fps_sample_range_list (list[int], optional): Range of points
+            to apply FPS. Default: [-1].
+        normalize_xyz (bool, optional): Whether to normalize local XYZ
+            with radius. Default: False.
    """

    def __init__(self,