Bump version to V1.0.0rc0

Bump version to V1.0.0rc0

Bump version to V1.0.0rc0
32a4328b · Wenwei Zhang · GitHub · 86cc487c · a8817998 · 32a4328b
Unverified Commit 32a4328b authored Feb 24, 2022 by Wenwei Zhang Committed by GitHub Feb 24, 2022
20 changed files
--- a/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
+++ b/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
@@ -83,15 +83,15 @@ class PointwiseSemanticHead(BaseModule):
        sample.

        Args:
-            voxel_centers (torch.Tensor): The center of voxels in shape \
+            voxel_centers (torch.Tensor): The center of voxels in shape
                (voxel_num, 3).
-            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in \
+            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in
                shape (box_num, 7).
-            gt_labels_3d (torch.Tensor): Class labels of ground truths in \
+            gt_labels_3d (torch.Tensor): Class labels of ground truths in
                shape (box_num).

        Returns:
-            tuple[torch.Tensor]: Segmentation targets with shape [voxel_num] \
+            tuple[torch.Tensor]: Segmentation targets with shape [voxel_num]
                part prediction targets with shape [voxel_num, 3]
        """
        gt_bboxes_3d = gt_bboxes_3d.to(voxel_centers.device)
@@ -99,8 +99,8 @@ class PointwiseSemanticHead(BaseModule):

        part_targets = voxel_centers.new_zeros((voxel_centers.shape[0], 3),
                                               dtype=torch.float32)
-        box_idx = gt_bboxes_3d.points_in_boxes(voxel_centers)
-        enlarge_box_idx = enlarged_gt_boxes.points_in_boxes(
+        box_idx = gt_bboxes_3d.points_in_boxes_part(voxel_centers)
+        enlarge_box_idx = enlarged_gt_boxes.points_in_boxes_part(
            voxel_centers).long()

        gt_labels_pad = F.pad(
@@ -131,19 +131,19 @@ class PointwiseSemanticHead(BaseModule):
        """generate segmentation and part prediction targets.

        Args:
-            voxel_centers (torch.Tensor): The center of voxels in shape \
+            voxel_centers (torch.Tensor): The center of voxels in shape
                (voxel_num, 3).
-            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in \
+            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in
                shape (box_num, 7).
-            gt_labels_3d (torch.Tensor): Class labels of ground truths in \
+            gt_labels_3d (torch.Tensor): Class labels of ground truths in
                shape (box_num).

        Returns:
            dict: Prediction targets

-                - seg_targets (torch.Tensor): Segmentation targets \
+                - seg_targets (torch.Tensor): Segmentation targets
                    with shape [voxel_num].
-                - part_targets (torch.Tensor): Part prediction targets \
+                - part_targets (torch.Tensor): Part prediction targets
                    with shape [voxel_num, 3].
        """
        batch_size = len(gt_labels_3d)

--- a/mmdet3d/models/roi_heads/mask_heads/primitive_head.py
+++ b/mmdet3d/models/roi_heads/mask_heads/primitive_head.py
@@ -20,7 +20,7 @@ class PrimitiveHead(BaseModule):
        num_dims (int): The dimension of primitive semantic information.
        num_classes (int): The number of class.
        primitive_mode (str): The mode of primitive module,
-            avaliable mode ['z', 'xy', 'line'].
+            available mode ['z', 'xy', 'line'].
        bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for encoding and
            decoding boxes.
        train_cfg (dict): Config for training.
@@ -30,7 +30,7 @@ class PrimitiveHead(BaseModule):
        feat_channels (tuple[int]): Convolution channels of
            prediction layer.
        upper_thresh (float): Threshold for line matching.
-        surface_thresh (float): Threshold for suface matching.
+        surface_thresh (float): Threshold for surface matching.
        conv_cfg (dict): Config of convolution in prediction layer.
        norm_cfg (dict): Config of BN in prediction layer.
        objectness_loss (dict): Config of objectness loss.
@@ -198,15 +198,15 @@ class PrimitiveHead(BaseModule):
        Args:
            bbox_preds (dict): Predictions from forward of primitive head.
            points (list[torch.Tensor]): Input points.
-            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \
+            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
                bboxes of each sample.
            gt_labels_3d (list[torch.Tensor]): Labels of each sample.
-            pts_semantic_mask (None | list[torch.Tensor]): Point-wise
+            pts_semantic_mask (list[torch.Tensor]): Point-wise
                semantic mask.
-            pts_instance_mask (None | list[torch.Tensor]): Point-wise
+            pts_instance_mask (list[torch.Tensor]): Point-wise
                instance mask.
            img_metas (list[dict]): Contain pcd and img's meta info.
-            gt_bboxes_ignore (None | list[torch.Tensor]): Specify
+            gt_bboxes_ignore (list[torch.Tensor]): Specify
                which bounding.

        Returns:
@@ -266,12 +266,12 @@ class PrimitiveHead(BaseModule):

        Args:
            points (list[torch.Tensor]): Points of each batch.
-            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \
+            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
                bboxes of each batch.
            gt_labels_3d (list[torch.Tensor]): Labels of each batch.
-            pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic
+            pts_semantic_mask (list[torch.Tensor]): Point-wise semantic
                label of each batch.
-            pts_instance_mask (None | list[torch.Tensor]): Point-wise instance
+            pts_instance_mask (list[torch.Tensor]): Point-wise instance
                label of each batch.
            bbox_preds (dict): Predictions from forward of primitive head.

@@ -333,12 +333,12 @@ class PrimitiveHead(BaseModule):

        Args:
            points (torch.Tensor): Points of each batch.
-            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \
+            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth
                boxes of each batch.
            gt_labels_3d (torch.Tensor): Labels of each batch.
-            pts_semantic_mask (None | torch.Tensor): Point-wise semantic
+            pts_semantic_mask (torch.Tensor): Point-wise semantic
                label of each batch.
-            pts_instance_mask (None | torch.Tensor): Point-wise instance
+            pts_instance_mask (torch.Tensor): Point-wise instance
                label of each batch.

        Returns:
@@ -355,7 +355,7 @@ class PrimitiveHead(BaseModule):

        # Generate pts_semantic_mask and pts_instance_mask when they are None
        if pts_semantic_mask is None or pts_instance_mask is None:
-            points2box_mask = gt_bboxes_3d.points_in_boxes(points)
+            points2box_mask = gt_bboxes_3d.points_in_boxes_all(points)
            assignment = points2box_mask.argmax(1)
            background_mask = points2box_mask.max(1)[0] == 0


--- a/mmdet3d/models/roi_heads/part_aggregation_roi_head.py
+++ b/mmdet3d/models/roi_heads/part_aggregation_roi_head.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import warnings
+
 from torch.nn import functional as F

 from mmdet3d.core import AssignResult

--- a/mmdet3d/models/roi_heads/point_rcnn_roi_head.py
+++ b/mmdet3d/models/roi_heads/point_rcnn_roi_head.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+from torch.nn import functional as F
+
+from mmdet3d.core import AssignResult
+from mmdet3d.core.bbox import bbox3d2result, bbox3d2roi
+from mmdet.core import build_assigner, build_sampler
+from mmdet.models import HEADS
+from ..builder import build_head, build_roi_extractor
+from .base_3droi_head import Base3DRoIHead
+
+
+@HEADS.register_module()
+class PointRCNNRoIHead(Base3DRoIHead):
+    """RoI head for PointRCNN.
+
+    Args:
+        bbox_head (dict): Config of bbox_head.
+        point_roi_extractor (dict): Config of RoI extractor.
+        train_cfg (dict): Train configs.
+        test_cfg (dict): Test configs.
+        depth_normalizer (float, optional): Normalize depth feature.
+            Defaults to 70.0.
+        init_cfg (dict, optional): Config of initialization. Defaults to None.
+    """
+
+    def __init__(self,
+                 bbox_head,
+                 point_roi_extractor,
+                 train_cfg,
+                 test_cfg,
+                 depth_normalizer=70.0,
+                 pretrained=None,
+                 init_cfg=None):
+        super(PointRCNNRoIHead, self).__init__(
+            bbox_head=bbox_head,
+            train_cfg=train_cfg,
+            test_cfg=test_cfg,
+            pretrained=pretrained,
+            init_cfg=init_cfg)
+        self.depth_normalizer = depth_normalizer
+
+        if point_roi_extractor is not None:
+            self.point_roi_extractor = build_roi_extractor(point_roi_extractor)
+
+        self.init_assigner_sampler()
+
+    def init_bbox_head(self, bbox_head):
+        """Initialize box head.
+
+        Args:
+            bbox_head (dict): Config dict of RoI Head.
+        """
+        self.bbox_head = build_head(bbox_head)
+
+    def init_mask_head(self):
+        """Initialize maek head."""
+        pass
+
+    def init_assigner_sampler(self):
+        """Initialize assigner and sampler."""
+        self.bbox_assigner = None
+        self.bbox_sampler = None
+        if self.train_cfg:
+            if isinstance(self.train_cfg.assigner, dict):
+                self.bbox_assigner = build_assigner(self.train_cfg.assigner)
+            elif isinstance(self.train_cfg.assigner, list):
+                self.bbox_assigner = [
+                    build_assigner(res) for res in self.train_cfg.assigner
+                ]
+            self.bbox_sampler = build_sampler(self.train_cfg.sampler)
+
+    def forward_train(self, feats_dict, input_metas, proposal_list,
+                      gt_bboxes_3d, gt_labels_3d):
+        """Training forward function of PointRCNNRoIHead.
+
+        Args:
+            feats_dict (dict): Contains features from the first stage.
+            imput_metas (list[dict]): Meta info of each input.
+            proposal_list (list[dict]): Proposal information from rpn.
+                The dictionary should contain the following keys:
+
+                - boxes_3d (:obj:`BaseInstance3DBoxes`): Proposal bboxes
+                - labels_3d (torch.Tensor): Labels of proposals
+            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]):
+                GT bboxes of each sample. The bboxes are encapsulated
+                by 3D box structures.
+            gt_labels_3d (list[LongTensor]): GT labels of each sample.
+
+        Returns:
+            dict: Losses from RoI RCNN head.
+                - loss_bbox (torch.Tensor): Loss of bboxes
+        """
+        features = feats_dict['features']
+        points = feats_dict['points']
+        point_cls_preds = feats_dict['points_cls_preds']
+        sem_scores = point_cls_preds.sigmoid()
+        point_scores = sem_scores.max(-1)[0]
+
+        sample_results = self._assign_and_sample(proposal_list, gt_bboxes_3d,
+                                                 gt_labels_3d)
+
+        # concat the depth, semantic features and backbone features
+        features = features.transpose(1, 2).contiguous()
+        point_depths = points.norm(dim=2) / self.depth_normalizer - 0.5
+        features_list = [
+            point_scores.unsqueeze(2),
+            point_depths.unsqueeze(2), features
+        ]
+        features = torch.cat(features_list, dim=2)
+
+        bbox_results = self._bbox_forward_train(features, points,
+                                                sample_results)
+        losses = dict()
+        losses.update(bbox_results['loss_bbox'])
+
+        return losses
+
+    def simple_test(self, feats_dict, img_metas, proposal_list, **kwargs):
+        """Simple testing forward function of PointRCNNRoIHead.
+
+        Note:
+            This function assumes that the batch size is 1
+
+        Args:
+            feats_dict (dict): Contains features from the first stage.
+            img_metas (list[dict]): Meta info of each image.
+            proposal_list (list[dict]): Proposal information from rpn.
+
+        Returns:
+            dict: Bbox results of one frame.
+        """
+        rois = bbox3d2roi([res['boxes_3d'].tensor for res in proposal_list])
+        labels_3d = [res['labels_3d'] for res in proposal_list]
+
+        features = feats_dict['features']
+        points = feats_dict['points']
+        point_cls_preds = feats_dict['points_cls_preds']
+        sem_scores = point_cls_preds.sigmoid()
+        point_scores = sem_scores.max(-1)[0]
+
+        features = features.transpose(1, 2).contiguous()
+        point_depths = points.norm(dim=2) / self.depth_normalizer - 0.5
+        features_list = [
+            point_scores.unsqueeze(2),
+            point_depths.unsqueeze(2), features
+        ]
+
+        features = torch.cat(features_list, dim=2)
+        batch_size = features.shape[0]
+        bbox_results = self._bbox_forward(features, points, batch_size, rois)
+        object_score = bbox_results['cls_score'].sigmoid()
+        bbox_list = self.bbox_head.get_bboxes(
+            rois,
+            object_score,
+            bbox_results['bbox_pred'],
+            labels_3d,
+            img_metas,
+            cfg=self.test_cfg)
+
+        bbox_results = [
+            bbox3d2result(bboxes, scores, labels)
+            for bboxes, scores, labels in bbox_list
+        ]
+        return bbox_results
+
+    def _bbox_forward_train(self, features, points, sampling_results):
+        """Forward training function of roi_extractor and bbox_head.
+
+        Args:
+            features (torch.Tensor): Backbone features with depth and \
+                semantic features.
+            points (torch.Tensor): Pointcloud.
+            sampling_results (:obj:`SamplingResult`): Sampled results used
+                for training.
+
+        Returns:
+            dict: Forward results including losses and predictions.
+        """
+        rois = bbox3d2roi([res.bboxes for res in sampling_results])
+        batch_size = features.shape[0]
+        bbox_results = self._bbox_forward(features, points, batch_size, rois)
+        bbox_targets = self.bbox_head.get_targets(sampling_results,
+                                                  self.train_cfg)
+
+        loss_bbox = self.bbox_head.loss(bbox_results['cls_score'],
+                                        bbox_results['bbox_pred'], rois,
+                                        *bbox_targets)
+
+        bbox_results.update(loss_bbox=loss_bbox)
+        return bbox_results
+
+    def _bbox_forward(self, features, points, batch_size, rois):
+        """Forward function of roi_extractor and bbox_head used in both
+        training and testing.
+
+        Args:
+            features (torch.Tensor): Backbone features with depth and
+                semantic features.
+            points (torch.Tensor): Pointcloud.
+            batch_size (int): Batch size.
+            rois (torch.Tensor): RoI boxes.
+
+        Returns:
+            dict: Contains predictions of bbox_head and
+                features of roi_extractor.
+        """
+        pooled_point_feats = self.point_roi_extractor(features, points,
+                                                      batch_size, rois)
+
+        cls_score, bbox_pred = self.bbox_head(pooled_point_feats)
+        bbox_results = dict(cls_score=cls_score, bbox_pred=bbox_pred)
+        return bbox_results
+
+    def _assign_and_sample(self, proposal_list, gt_bboxes_3d, gt_labels_3d):
+        """Assign and sample proposals for training.
+
+        Args:
+            proposal_list (list[dict]): Proposals produced by RPN.
+            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
+                boxes.
+            gt_labels_3d (list[torch.Tensor]): Ground truth labels
+
+        Returns:
+            list[:obj:`SamplingResult`]: Sampled results of each training
+                sample.
+        """
+        sampling_results = []
+        # bbox assign
+        for batch_idx in range(len(proposal_list)):
+            cur_proposal_list = proposal_list[batch_idx]
+            cur_boxes = cur_proposal_list['boxes_3d']
+            cur_labels_3d = cur_proposal_list['labels_3d']
+            cur_gt_bboxes = gt_bboxes_3d[batch_idx].to(cur_boxes.device)
+            cur_gt_labels = gt_labels_3d[batch_idx]
+            batch_num_gts = 0
+            # 0 is bg
+            batch_gt_indis = cur_gt_labels.new_full((len(cur_boxes), ), 0)
+            batch_max_overlaps = cur_boxes.tensor.new_zeros(len(cur_boxes))
+            # -1 is bg
+            batch_gt_labels = cur_gt_labels.new_full((len(cur_boxes), ), -1)
+
+            # each class may have its own assigner
+            if isinstance(self.bbox_assigner, list):
+                for i, assigner in enumerate(self.bbox_assigner):
+                    gt_per_cls = (cur_gt_labels == i)
+                    pred_per_cls = (cur_labels_3d == i)
+                    cur_assign_res = assigner.assign(
+                        cur_boxes.tensor[pred_per_cls],
+                        cur_gt_bboxes.tensor[gt_per_cls],
+                        gt_labels=cur_gt_labels[gt_per_cls])
+                    # gather assign_results in different class into one result
+                    batch_num_gts += cur_assign_res.num_gts
+                    # gt inds (1-based)
+                    gt_inds_arange_pad = gt_per_cls.nonzero(
+                        as_tuple=False).view(-1) + 1
+                    # pad 0 for indice unassigned
+                    gt_inds_arange_pad = F.pad(
+                        gt_inds_arange_pad, (1, 0), mode='constant', value=0)
+                    # pad -1 for indice ignore
+                    gt_inds_arange_pad = F.pad(
+                        gt_inds_arange_pad, (1, 0), mode='constant', value=-1)
+                    # convert to 0~gt_num+2 for indices
+                    gt_inds_arange_pad += 1
+                    # now 0 is bg, >1 is fg in batch_gt_indis
+                    batch_gt_indis[pred_per_cls] = gt_inds_arange_pad[
+                        cur_assign_res.gt_inds + 1] - 1
+                    batch_max_overlaps[
+                        pred_per_cls] = cur_assign_res.max_overlaps
+                    batch_gt_labels[pred_per_cls] = cur_assign_res.labels
+
+                assign_result = AssignResult(batch_num_gts, batch_gt_indis,
+                                             batch_max_overlaps,
+                                             batch_gt_labels)
+            else:  # for single class
+                assign_result = self.bbox_assigner.assign(
+                    cur_boxes.tensor,
+                    cur_gt_bboxes.tensor,
+                    gt_labels=cur_gt_labels)
+
+            # sample boxes
+            sampling_result = self.bbox_sampler.sample(assign_result,
+                                                       cur_boxes.tensor,
+                                                       cur_gt_bboxes.tensor,
+                                                       cur_gt_labels)
+            sampling_results.append(sampling_result)
+        return sampling_results
--- a/mmdet3d/models/roi_heads/roi_extractors/__init__.py
+++ b/mmdet3d/models/roi_heads/roi_extractors/__init__.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from mmdet.models.roi_heads.roi_extractors import SingleRoIExtractor
 from .single_roiaware_extractor import Single3DRoIAwareExtractor
+from .single_roipoint_extractor import Single3DRoIPointExtractor

-__all__ = ['SingleRoIExtractor', 'Single3DRoIAwareExtractor']
+__all__ = [
+    'SingleRoIExtractor', 'Single3DRoIAwareExtractor',
+    'Single3DRoIPointExtractor'
+]
--- a/mmdet3d/models/roi_heads/roi_extractors/single_roipoint_extractor.py
+++ b/mmdet3d/models/roi_heads/roi_extractors/single_roipoint_extractor.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+from torch import nn as nn
+
+from mmdet3d import ops
+from mmdet3d.core.bbox.structures import rotation_3d_in_axis
+from mmdet.models.builder import ROI_EXTRACTORS
+
+
+@ROI_EXTRACTORS.register_module()
+class Single3DRoIPointExtractor(nn.Module):
+    """Point-wise roi-aware Extractor.
+
+    Extract Point-wise roi features.
+
+    Args:
+        roi_layer (dict): The config of roi layer.
+    """
+
+    def __init__(self, roi_layer=None):
+        super(Single3DRoIPointExtractor, self).__init__()
+        self.roi_layer = self.build_roi_layers(roi_layer)
+
+    def build_roi_layers(self, layer_cfg):
+        """Build roi layers using `layer_cfg`"""
+        cfg = layer_cfg.copy()
+        layer_type = cfg.pop('type')
+        assert hasattr(ops, layer_type)
+        layer_cls = getattr(ops, layer_type)
+        roi_layers = layer_cls(**cfg)
+        return roi_layers
+
+    def forward(self, feats, coordinate, batch_inds, rois):
+        """Extract point-wise roi features.
+
+        Args:
+            feats (torch.FloatTensor): Point-wise features with
+                shape (batch, npoints, channels) for pooling.
+            coordinate (torch.FloatTensor): Coordinate of each point.
+            batch_inds (torch.LongTensor): Indicate the batch of each point.
+            rois (torch.FloatTensor): Roi boxes with batch indices.
+
+        Returns:
+            torch.FloatTensor: Pooled features
+        """
+        rois = rois[..., 1:]
+        rois = rois.view(batch_inds, -1, rois.shape[-1])
+        with torch.no_grad():
+            pooled_roi_feat, pooled_empty_flag = self.roi_layer(
+                coordinate, feats, rois)
+
+            # canonical transformation
+            roi_center = rois[:, :, 0:3]
+            pooled_roi_feat[:, :, :, 0:3] -= roi_center.unsqueeze(dim=2)
+            pooled_roi_feat = pooled_roi_feat.view(-1,
+                                                   pooled_roi_feat.shape[-2],
+                                                   pooled_roi_feat.shape[-1])
+            pooled_roi_feat[:, :, 0:3] = rotation_3d_in_axis(
+                pooled_roi_feat[:, :, 0:3],
+                -(rois.view(-1, rois.shape[-1])[:, 6]),
+                axis=2)
+            pooled_roi_feat[pooled_empty_flag.view(-1) > 0] = 0
+
+        return pooled_roi_feat
--- a/mmdet3d/models/segmentors/base.py
+++ b/mmdet3d/models/segmentors/base.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from os import path as osp
+
 import mmcv
 import numpy as np
 import torch
 from mmcv.parallel import DataContainer as DC
 from mmcv.runner import auto_fp16
-from os import path as osp

 from mmdet3d.core import show_seg_result
 from mmseg.models.segmentors import BaseSegmentor
@@ -80,7 +81,7 @@ class Base3DSegmentor(BaseSegmentor):
        Args:
            data (list[dict]): Input points and the information of the sample.
            result (list[dict]): Prediction results.
-            palette (list[list[int]]] | np.ndarray | None): The palette of
+            palette (list[list[int]]] | np.ndarray): The palette of
                segmentation map. If None is given, random palette will be
                generated. Default: None
            out_dir (str): Output directory of visualization result.

--- a/mmdet3d/models/segmentors/encoder_decoder.py
+++ b/mmdet3d/models/segmentors/encoder_decoder.py
@@ -187,7 +187,7 @@ class EncoderDecoder3D(Base3DSegmentor):
                          use_normalized_coord=False):
        """Generating model input.

-        Generate input by subtracting patch center and adding additional \
+        Generate input by subtracting patch center and adding additional
            features. Currently support colors and normalized xyz as features.

        Args:
@@ -195,7 +195,7 @@ class EncoderDecoder3D(Base3DSegmentor):
            patch_center (torch.Tensor): Center coordinate of the patch.
            coord_max (torch.Tensor): Max coordinate of all 3D points.
            feats (torch.Tensor): Features of sampled points of shape [S, C].
-            use_normalized_coord (bool, optional): Whether to use normalized \
+            use_normalized_coord (bool, optional): Whether to use normalized
                xyz as additional features. Defaults to False.

        Returns:
@@ -233,17 +233,17 @@ class EncoderDecoder3D(Base3DSegmentor):
            block_size (float, optional): Size of a patch to sample.
            sample_rate (float, optional): Stride used in sliding patch.
                Defaults to 0.5.
-            use_normalized_coord (bool, optional): Whether to use normalized \
+            use_normalized_coord (bool, optional): Whether to use normalized
                xyz as additional features. Defaults to False.
            eps (float, optional): A value added to patch boundary to guarantee
-                points coverage. Default 1e-3.
+                points coverage. Defaults to 1e-3.

        Returns:
            np.ndarray | np.ndarray:

-                - patch_points (torch.Tensor): Points of different patches of \
+                - patch_points (torch.Tensor): Points of different patches of
                    shape [K, N, 3+C].
-                - patch_idxs (torch.Tensor): Index of each point in \
+                - patch_idxs (torch.Tensor): Index of each point in
                    `patch_points`, of shape [K, N].
        """
        device = points.device

--- a/mmdet3d/models/utils/__init__.py
+++ b/mmdet3d/models/utils/__init__.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from .clip_sigmoid import clip_sigmoid
+from .edge_indices import get_edge_indices
+from .gen_keypoints import get_keypoints
+from .handle_objs import filter_outside_objs, handle_proj_objs
 from .mlp import MLP

-__all__ = ['clip_sigmoid', 'MLP']
+__all__ = [
+    'clip_sigmoid', 'MLP', 'get_edge_indices', 'filter_outside_objs',
+    'handle_proj_objs', 'get_keypoints'
+]
--- a/mmdet3d/models/utils/clip_sigmoid.py
+++ b/mmdet3d/models/utils/clip_sigmoid.py
@@ -7,8 +7,8 @@ def clip_sigmoid(x, eps=1e-4):

    Args:
        x (torch.Tensor): Input feature map with the shape of [B, N, H, W].
-        eps (float): Lower bound of the range to be clamped to. Defaults
-            to 1e-4.
+        eps (float, optional): Lower bound of the range to be clamped to.
+            Defaults to 1e-4.

    Returns:
        torch.Tensor: Feature map after sigmoid.

--- a/mmdet3d/models/utils/edge_indices.py
+++ b/mmdet3d/models/utils/edge_indices.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+
+
+def get_edge_indices(img_metas,
+                     downsample_ratio,
+                     step=1,
+                     pad_mode='default',
+                     dtype=np.float32,
+                     device='cpu'):
+    """Function to filter the objects label outside the image.
+    The edge_indices are generated using numpy on cpu rather
+    than on CUDA due to the latency issue. When batch size = 8,
+    this function with numpy array is ~8 times faster than that
+    with CUDA tensor (0.09s and 0.72s in 100 runs).
+
+    Args:
+        img_metas (list[dict]): Meta information of each image, e.g.,
+            image size, scaling factor, etc.
+        downsample_ratio (int): Downsample ratio of output feature,
+        step (int, optional): Step size used for generateing
+            edge indices. Default: 1.
+        pad_mode (str, optional): Padding mode during data pipeline.
+            Default: 'default'.
+        dtype (torch.dtype, optional): Dtype of edge indices tensor.
+            Default: np.float32.
+        device (str, optional): Device of edge indices tensor.
+            Default: 'cpu'.
+
+    Returns:
+        list[Tensor]: Edge indices for each image in batch data.
+    """
+    edge_indices_list = []
+    for i in range(len(img_metas)):
+        img_shape = img_metas[i]['img_shape']
+        pad_shape = img_metas[i]['pad_shape']
+        h, w = img_shape[:2]
+        pad_h, pad_w = pad_shape
+        edge_indices = []
+
+        if pad_mode == 'default':
+            x_min = 0
+            y_min = 0
+            x_max = (w - 1) // downsample_ratio
+            y_max = (h - 1) // downsample_ratio
+        elif pad_mode == 'center':
+            x_min = np.ceil((pad_w - w) / 2 * downsample_ratio)
+            y_min = np.ceil((pad_h - h) / 2 * downsample_ratio)
+            x_max = x_min + w // downsample_ratio
+            y_max = y_min + h // downsample_ratio
+        else:
+            raise NotImplementedError
+
+        # left
+        y = np.arange(y_min, y_max, step, dtype=dtype)
+        x = np.ones(len(y)) * x_min
+
+        edge_indices_edge = np.stack((x, y), axis=1)
+        edge_indices.append(edge_indices_edge)
+
+        # bottom
+        x = np.arange(x_min, x_max, step, dtype=dtype)
+        y = np.ones(len(x)) * y_max
+
+        edge_indices_edge = np.stack((x, y), axis=1)
+        edge_indices.append(edge_indices_edge)
+
+        # right
+        y = np.arange(y_max, y_min, -step, dtype=dtype)
+        x = np.ones(len(y)) * x_max
+
+        edge_indices_edge = np.stack((x, y), axis=1)
+        edge_indices.append(edge_indices_edge)
+
+        # top
+        x = np.arange(x_max, x_min, -step, dtype=dtype)
+        y = np.ones(len(x)) * y_min
+
+        edge_indices_edge = np.stack((x, y), axis=1)
+        edge_indices.append(edge_indices_edge)
+
+        edge_indices = \
+            np.concatenate([index for index in edge_indices], axis=0)
+        edge_indices = torch.from_numpy(edge_indices).to(device).long()
+        edge_indices_list.append(edge_indices)
+
+    return edge_indices_list
--- a/mmdet3d/models/utils/gen_keypoints.py
+++ b/mmdet3d/models/utils/gen_keypoints.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+
+from mmdet3d.core.bbox import points_cam2img
+
+
+def get_keypoints(gt_bboxes_3d_list,
+                  centers2d_list,
+                  img_metas,
+                  use_local_coords=True):
+    """Function to filter the objects label outside the image.
+
+    Args:
+        gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,
+            shape (num_gt, 4).
+        centers2d_list (list[Tensor]): Projected 3D centers onto 2D image,
+            shape (num_gt, 2).
+        img_metas (list[dict]): Meta information of each image, e.g.,
+            image size, scaling factor, etc.
+        use_local_coords (bool, optional): Wheher to use local coordinates
+            for keypoints. Default: True.
+
+    Returns:
+        tuple[list[Tensor]]: It contains two elements, the first is the
+        keypoints for each projected 2D bbox in batch data. The second is
+        the visible mask of depth calculated by keypoints.
+    """
+
+    assert len(gt_bboxes_3d_list) == len(centers2d_list)
+    bs = len(gt_bboxes_3d_list)
+    keypoints2d_list = []
+    keypoints_depth_mask_list = []
+
+    for i in range(bs):
+        gt_bboxes_3d = gt_bboxes_3d_list[i]
+        centers2d = centers2d_list[i]
+        img_shape = img_metas[i]['img_shape']
+        cam2img = img_metas[i]['cam2img']
+        h, w = img_shape[:2]
+        # (N, 8, 3)
+        corners3d = gt_bboxes_3d.corners
+        top_centers3d = torch.mean(corners3d[:, [0, 1, 4, 5], :], dim=1)
+        bot_centers3d = torch.mean(corners3d[:, [2, 3, 6, 7], :], dim=1)
+        # (N, 2, 3)
+        top_bot_centers3d = torch.stack((top_centers3d, bot_centers3d), dim=1)
+        keypoints3d = torch.cat((corners3d, top_bot_centers3d), dim=1)
+        # (N, 10, 2)
+        keypoints2d = points_cam2img(keypoints3d, cam2img)
+
+        # keypoints mask: keypoints must be inside
+        # the image and in front of the camera
+        keypoints_x_visible = (keypoints2d[..., 0] >= 0) & (
+            keypoints2d[..., 0] <= w - 1)
+        keypoints_y_visible = (keypoints2d[..., 1] >= 0) & (
+            keypoints2d[..., 1] <= h - 1)
+        keypoints_z_visible = (keypoints3d[..., -1] > 0)
+
+        # (N, 1O)
+        keypoints_visible = keypoints_x_visible & \
+            keypoints_y_visible & keypoints_z_visible
+        # center, diag-02, diag-13
+        keypoints_depth_valid = torch.stack(
+            (keypoints_visible[:, [8, 9]].all(dim=1),
+             keypoints_visible[:, [0, 3, 5, 6]].all(dim=1),
+             keypoints_visible[:, [1, 2, 4, 7]].all(dim=1)),
+            dim=1)
+        keypoints_visible = keypoints_visible.float()
+
+        if use_local_coords:
+            keypoints2d = torch.cat((keypoints2d - centers2d.unsqueeze(1),
+                                     keypoints_visible.unsqueeze(-1)),
+                                    dim=2)
+        else:
+            keypoints2d = torch.cat(
+                (keypoints2d, keypoints_visible.unsqueeze(-1)), dim=2)
+
+        keypoints2d_list.append(keypoints2d)
+        keypoints_depth_mask_list.append(keypoints_depth_valid)
+
+    return (keypoints2d_list, keypoints_depth_mask_list)
--- a/mmdet3d/models/utils/handle_objs.py
+++ b/mmdet3d/models/utils/handle_objs.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+
+
+def filter_outside_objs(gt_bboxes_list, gt_labels_list, gt_bboxes_3d_list,
+                        gt_labels_3d_list, centers2d_list, img_metas):
+    """Function to filter the objects label outside the image.
+
+    Args:
+        gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,
+            each has shape (num_gt, 4).
+        gt_labels_list (list[Tensor]): Ground truth labels of each box,
+            each has shape (num_gt,).
+        gt_bboxes_3d_list (list[Tensor]): 3D Ground truth bboxes of each
+            image, each has shape (num_gt, bbox_code_size).
+        gt_labels_3d_list (list[Tensor]): 3D Ground truth labels of each
+            box, each has shape (num_gt,).
+        centers2d_list (list[Tensor]): Projected 3D centers onto 2D image,
+            each has shape (num_gt, 2).
+        img_metas (list[dict]): Meta information of each image, e.g.,
+            image size, scaling factor, etc.
+    """
+    bs = len(centers2d_list)
+
+    for i in range(bs):
+        centers2d = centers2d_list[i].clone()
+        img_shape = img_metas[i]['img_shape']
+        keep_inds = (centers2d[:, 0] > 0) & \
+            (centers2d[:, 0] < img_shape[1]) & \
+            (centers2d[:, 1] > 0) & \
+            (centers2d[:, 1] < img_shape[0])
+        centers2d_list[i] = centers2d[keep_inds]
+        gt_labels_list[i] = gt_labels_list[i][keep_inds]
+        gt_bboxes_list[i] = gt_bboxes_list[i][keep_inds]
+        gt_bboxes_3d_list[i].tensor = gt_bboxes_3d_list[i].tensor[keep_inds]
+        gt_labels_3d_list[i] = gt_labels_3d_list[i][keep_inds]
+
+
+def get_centers2d_target(centers2d, centers, img_shape):
+    """Function to get target centers2d.
+
+    Args:
+        centers2d (Tensor): Projected 3D centers onto 2D images.
+        centers (Tensor): Centers of 2d gt bboxes.
+        img_shape (tuple): Resized image shape.
+
+    Returns:
+        torch.Tensor: Projected 3D centers (centers2D) target.
+    """
+    N = centers2d.shape[0]
+    h, w = img_shape[:2]
+    valid_intersects = centers2d.new_zeros((N, 2))
+    a = (centers[:, 1] - centers2d[:, 1]) / (centers[:, 0] - centers2d[:, 0])
+    b = centers[:, 1] - a * centers[:, 0]
+    left_y = b
+    right_y = (w - 1) * a + b
+    top_x = -b / a
+    bottom_x = (h - 1 - b) / a
+
+    left_coors = torch.stack((left_y.new_zeros(N, ), left_y), dim=1)
+    right_coors = torch.stack((right_y.new_full((N, ), w - 1), right_y), dim=1)
+    top_coors = torch.stack((top_x, top_x.new_zeros(N, )), dim=1)
+    bottom_coors = torch.stack((bottom_x, bottom_x.new_full((N, ), h - 1)),
+                               dim=1)
+
+    intersects = torch.stack(
+        [left_coors, right_coors, top_coors, bottom_coors], dim=1)
+    intersects_x = intersects[:, :, 0]
+    intersects_y = intersects[:, :, 1]
+    inds = (intersects_x >= 0) & (intersects_x <=
+                                  w - 1) & (intersects_y >= 0) & (
+                                      intersects_y <= h - 1)
+    valid_intersects = intersects[inds].reshape(N, 2, 2)
+    dist = torch.norm(valid_intersects - centers2d.unsqueeze(1), dim=2)
+    min_idx = torch.argmin(dist, dim=1)
+
+    min_idx = min_idx.unsqueeze(-1).unsqueeze(-1).expand(-1, -1, 2)
+    centers2d_target = valid_intersects.gather(dim=1, index=min_idx).squeeze(1)
+
+    return centers2d_target
+
+
+def handle_proj_objs(centers2d_list, gt_bboxes_list, img_metas):
+    """Function to handle projected object centers2d, generate target
+    centers2d.
+
+    Args:
+        gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,
+            shape (num_gt, 4).
+        centers2d_list (list[Tensor]): Projected 3D centers onto 2D image,
+            shape (num_gt, 2).
+        img_metas (list[dict]): Meta information of each image, e.g.,
+            image size, scaling factor, etc.
+
+    Returns:
+        tuple[list[Tensor]]: It contains three elements. The first is the
+        target centers2d after handling the truncated objects. The second
+        is the offsets between target centers2d and round int dtype
+        centers2d,and the last is the truncation mask for each object in
+        batch data.
+    """
+    bs = len(centers2d_list)
+    centers2d_target_list = []
+    trunc_mask_list = []
+    offsets2d_list = []
+    # for now, only pad mode that img is padded by right and
+    # bottom side is supported.
+    for i in range(bs):
+        centers2d = centers2d_list[i]
+        gt_bbox = gt_bboxes_list[i]
+        img_shape = img_metas[i]['img_shape']
+        centers2d_target = centers2d.clone()
+        inside_inds = (centers2d[:, 0] > 0) & \
+            (centers2d[:, 0] < img_shape[1]) & \
+            (centers2d[:, 1] > 0) & \
+            (centers2d[:, 1] < img_shape[0])
+        outside_inds = ~inside_inds
+
+        # if there are outside objects
+        if outside_inds.any():
+            centers = (gt_bbox[:, :2] + gt_bbox[:, 2:]) / 2
+            outside_centers2d = centers2d[outside_inds]
+            match_centers = centers[outside_inds]
+            target_outside_centers2d = get_centers2d_target(
+                outside_centers2d, match_centers, img_shape)
+            centers2d_target[outside_inds] = target_outside_centers2d
+
+        offsets2d = centers2d - centers2d_target.round().int()
+        trunc_mask = outside_inds
+
+        centers2d_target_list.append(centers2d_target)
+        trunc_mask_list.append(trunc_mask)
+        offsets2d_list.append(offsets2d)
+
+    return (centers2d_target_list, offsets2d_list, trunc_mask_list)
--- a/mmdet3d/models/utils/mlp.py
+++ b/mmdet3d/models/utils/mlp.py
@@ -10,15 +10,15 @@ class MLP(BaseModule):
    Pass features (B, C, N) through an MLP.

    Args:
-        in_channels (int): Number of channels of input features.
+        in_channels (int, optional): Number of channels of input features.
            Default: 18.
-        conv_channels (tuple[int]): Out channels of the convolution.
+        conv_channels (tuple[int], optional): Out channels of the convolution.
            Default: (256, 256).
-        conv_cfg (dict): Config of convolution.
+        conv_cfg (dict, optional): Config of convolution.
            Default: dict(type='Conv1d').
-        norm_cfg (dict): Config of normalization.
+        norm_cfg (dict, optional): Config of normalization.
            Default: dict(type='BN1d').
-        act_cfg (dict): Config of activation.
+        act_cfg (dict, optional): Config of activation.
            Default: dict(type='ReLU').
    """


--- a/mmdet3d/models/voxel_encoders/pillar_encoder.py
+++ b/mmdet3d/models/voxel_encoders/pillar_encoder.py
@@ -15,7 +15,6 @@ class PillarFeatureNet(nn.Module):

    The network prepares the pillar features and performs forward pass
    through PFNLayers.
-
    Args:
        in_channels (int, optional): Number of input features,
            either x, y, z or x, y, z, r. Defaults to 4.
@@ -33,7 +32,7 @@ class PillarFeatureNet(nn.Module):
            Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01).
        mode (str, optional): The mode to gather point features. Options are
            'max' or 'avg'. Defaults to 'max'.
-        legacy (bool): Whether to use the new behavior or
+        legacy (bool, optional): Whether to use the new behavior or
            the original behavior. Defaults to True.
    """

@@ -54,7 +53,7 @@ class PillarFeatureNet(nn.Module):
        if with_cluster_center:
            in_channels += 3
        if with_voxel_center:
-            in_channels += 2
+            in_channels += 3
        if with_distance:
            in_channels += 1
        self._with_distance = with_distance
@@ -84,8 +83,10 @@ class PillarFeatureNet(nn.Module):
        # Need pillar (voxel) size and x/y offset in order to calculate offset
        self.vx = voxel_size[0]
        self.vy = voxel_size[1]
+        self.vz = voxel_size[2]
        self.x_offset = self.vx / 2 + point_cloud_range[0]
        self.y_offset = self.vy / 2 + point_cloud_range[1]
+        self.z_offset = self.vz / 2 + point_cloud_range[2]
        self.point_cloud_range = point_cloud_range

    @force_fp32(out_fp16=True)
@@ -97,7 +98,6 @@ class PillarFeatureNet(nn.Module):
                (N, M, C).
            num_points (torch.Tensor): Number of points in each pillar.
            coors (torch.Tensor): Coordinates of each voxel.
-
        Returns:
            torch.Tensor: Features of pillars.
        """
@@ -114,21 +114,27 @@ class PillarFeatureNet(nn.Module):
        dtype = features.dtype
        if self._with_voxel_center:
            if not self.legacy:
-                f_center = torch.zeros_like(features[:, :, :2])
+                f_center = torch.zeros_like(features[:, :, :3])
                f_center[:, :, 0] = features[:, :, 0] - (
                    coors[:, 3].to(dtype).unsqueeze(1) * self.vx +
                    self.x_offset)
                f_center[:, :, 1] = features[:, :, 1] - (
                    coors[:, 2].to(dtype).unsqueeze(1) * self.vy +
                    self.y_offset)
+                f_center[:, :, 2] = features[:, :, 2] - (
+                    coors[:, 1].to(dtype).unsqueeze(1) * self.vz +
+                    self.z_offset)
            else:
-                f_center = features[:, :, :2]
+                f_center = features[:, :, :3]
                f_center[:, :, 0] = f_center[:, :, 0] - (
                    coors[:, 3].type_as(features).unsqueeze(1) * self.vx +
                    self.x_offset)
                f_center[:, :, 1] = f_center[:, :, 1] - (
                    coors[:, 2].type_as(features).unsqueeze(1) * self.vy +
                    self.y_offset)
+                f_center[:, :, 2] = f_center[:, :, 2] - (
+                    coors[:, 1].type_as(features).unsqueeze(1) * self.vz +
+                    self.z_offset)
            features_ls.append(f_center)

        if self._with_distance:
@@ -177,6 +183,8 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
            Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01).
        mode (str, optional): The mode to gather point features. Options are
            'max' or 'avg'. Defaults to 'max'.
+        legacy (bool, optional): Whether to use the new behavior or
+            the original behavior. Defaults to True.
    """

    def __init__(self,
@@ -188,7 +196,8 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
                 voxel_size=(0.2, 0.2, 4),
                 point_cloud_range=(0, -40, -3, 70.4, 40, 1),
                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
-                 mode='max'):
+                 mode='max',
+                 legacy=True):
        super(DynamicPillarFeatureNet, self).__init__(
            in_channels,
            feat_channels,
@@ -198,7 +207,8 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
            voxel_size=voxel_size,
            point_cloud_range=point_cloud_range,
            norm_cfg=norm_cfg,
-            mode=mode)
+            mode=mode,
+            legacy=legacy)
        self.fp16_enabled = False
        feat_channels = [self.in_channels] + list(feat_channels)
        pfn_layers = []
@@ -233,7 +243,7 @@ class DynamicPillarFeatureNet(PillarFeatureNet):

        Returns:
            torch.Tensor: Corresponding voxel centers of each points, shape
-                (M, C), where M is the numver of points.
+                (M, C), where M is the number of points.
        """
        # Step 1: scatter voxel into canvas
        # Calculate necessary things for canvas creation

--- a/mmdet3d/models/voxel_encoders/utils.py
+++ b/mmdet3d/models/voxel_encoders/utils.py
@@ -113,11 +113,12 @@ class PFNLayer(nn.Module):
    Args:
        in_channels (int): Number of input channels.
        out_channels (int): Number of output channels.
-        norm_cfg (dict): Config dict of normalization layers
-        last_layer (bool): If last_layer, there is no concatenation of
-            features.
-        mode (str): Pooling model to gather features inside voxels.
-            Default to 'max'.
+        norm_cfg (dict, optional): Config dict of normalization layers.
+            Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01).
+        last_layer (bool, optional): If last_layer, there is no
+            concatenation of features. Defaults to False.
+        mode (str, optional): Pooling model to gather features inside voxels.
+            Defaults to 'max'.
    """

    def __init__(self,

--- a/mmdet3d/models/voxel_encoders/voxel_encoder.py
+++ b/mmdet3d/models/voxel_encoders/voxel_encoder.py
@@ -17,7 +17,7 @@ class HardSimpleVFE(nn.Module):
    It simply averages the values of points in a voxel.

    Args:
-        num_features (int): Number of features to use. Default: 4.
+        num_features (int, optional): Number of features to use. Default: 4.
    """

    def __init__(self, num_features=4):
@@ -93,25 +93,27 @@ class DynamicVFE(nn.Module):
    The number of points inside the voxel varies.

    Args:
-        in_channels (int): Input channels of VFE. Defaults to 4.
-        feat_channels (list(int)): Channels of features in VFE.
-        with_distance (bool): Whether to use the L2 distance of points to the
-            origin point. Default False.
-        with_cluster_center (bool): Whether to use the distance to cluster
-            center of points inside a voxel. Default to False.
-        with_voxel_center (bool): Whether to use the distance to center of
-            voxel for each points inside a voxel. Default to False.
-        voxel_size (tuple[float]): Size of a single voxel. Default to
-            (0.2, 0.2, 4).
-        point_cloud_range (tuple[float]): The range of points or voxels.
-            Default to (0, -40, -3, 70.4, 40, 1).
-        norm_cfg (dict): Config dict of normalization layers.
-        mode (str): The mode when pooling features of points inside a voxel.
-            Available options include 'max' and 'avg'. Default to 'max'.
-        fusion_layer (dict | None): The config dict of fusion layer used in
-            multi-modal detectors. Default to None.
-        return_point_feats (bool): Whether to return the features of each
-            points. Default to False.
+        in_channels (int, optional): Input channels of VFE. Defaults to 4.
+        feat_channels (list(int), optional): Channels of features in VFE.
+        with_distance (bool, optional): Whether to use the L2 distance of
+            points to the origin point. Defaults to False.
+        with_cluster_center (bool, optional): Whether to use the distance
+            to cluster center of points inside a voxel. Defaults to False.
+        with_voxel_center (bool, optional): Whether to use the distance
+            to center of voxel for each points inside a voxel.
+            Defaults to False.
+        voxel_size (tuple[float], optional): Size of a single voxel.
+            Defaults to (0.2, 0.2, 4).
+        point_cloud_range (tuple[float], optional): The range of points
+            or voxels. Defaults to (0, -40, -3, 70.4, 40, 1).
+        norm_cfg (dict, optional): Config dict of normalization layers.
+        mode (str, optional): The mode when pooling features of points
+            inside a voxel. Available options include 'max' and 'avg'.
+            Defaults to 'max'.
+        fusion_layer (dict, optional): The config dict of fusion
+            layer used in multi-modal detectors. Defaults to None.
+        return_point_feats (bool, optional): Whether to return the features
+            of each points. Defaults to False.
    """

    def __init__(self,
@@ -230,7 +232,7 @@ class DynamicVFE(nn.Module):
            coors (torch.Tensor): Coordinates of voxels, shape is  Nx(1+NDim).
            points (list[torch.Tensor], optional): Raw points used to guide the
                multi-modality fusion. Defaults to None.
-            img_feats (list[torch.Tensor], optional): Image fetures used for
+            img_feats (list[torch.Tensor], optional): Image features used for
                multi-modality fusion. Defaults to None.
            img_metas (dict, optional): [description]. Defaults to None.

@@ -292,25 +294,26 @@ class HardVFE(nn.Module):
    image feature into voxel features in a point-wise manner.

    Args:
-        in_channels (int): Input channels of VFE. Defaults to 4.
-        feat_channels (list(int)): Channels of features in VFE.
-        with_distance (bool): Whether to use the L2 distance of points to the
-            origin point. Default False.
-        with_cluster_center (bool): Whether to use the distance to cluster
-            center of points inside a voxel. Default to False.
-        with_voxel_center (bool): Whether to use the distance to center of
-            voxel for each points inside a voxel. Default to False.
-        voxel_size (tuple[float]): Size of a single voxel. Default to
-            (0.2, 0.2, 4).
-        point_cloud_range (tuple[float]): The range of points or voxels.
-            Default to (0, -40, -3, 70.4, 40, 1).
-        norm_cfg (dict): Config dict of normalization layers.
-        mode (str): The mode when pooling features of points inside a voxel.
-            Available options include 'max' and 'avg'. Default to 'max'.
-        fusion_layer (dict | None): The config dict of fusion layer used in
-            multi-modal detectors. Default to None.
-        return_point_feats (bool): Whether to return the features of each
-            points. Default to False.
+        in_channels (int, optional): Input channels of VFE. Defaults to 4.
+        feat_channels (list(int), optional): Channels of features in VFE.
+        with_distance (bool, optional): Whether to use the L2 distance
+            of points to the origin point. Defaults to False.
+        with_cluster_center (bool, optional): Whether to use the distance
+            to cluster center of points inside a voxel. Defaults to False.
+        with_voxel_center (bool, optional): Whether to use the distance to
+            center of voxel for each points inside a voxel. Defaults to False.
+        voxel_size (tuple[float], optional): Size of a single voxel.
+            Defaults to (0.2, 0.2, 4).
+        point_cloud_range (tuple[float], optional): The range of points
+            or voxels. Defaults to (0, -40, -3, 70.4, 40, 1).
+        norm_cfg (dict, optional): Config dict of normalization layers.
+        mode (str, optional): The mode when pooling features of points inside a
+            voxel. Available options include 'max' and 'avg'.
+            Defaults to 'max'.
+        fusion_layer (dict, optional): The config dict of fusion layer
+            used in multi-modal detectors. Defaults to None.
+        return_point_feats (bool, optional): Whether to return the
+            features of each points. Defaults to False.
    """

    def __init__(self,
@@ -394,7 +397,7 @@ class HardVFE(nn.Module):
            features (torch.Tensor): Features of voxels, shape is MxNxC.
            num_points (torch.Tensor): Number of points in each voxel.
            coors (torch.Tensor): Coordinates of voxels, shape is Mx(1+NDim).
-            img_feats (list[torch.Tensor], optional): Image fetures used for
+            img_feats (list[torch.Tensor], optional): Image features used for
                multi-modality fusion. Defaults to None.
            img_metas (dict, optional): [description]. Defaults to None.


--- a/mmdet3d/ops/__init__.py
+++ b/mmdet3d/ops/__init__.py
@@ -4,6 +4,7 @@ from mmcv.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version,
                      sigmoid_focal_loss)

 from .ball_query import ball_query
+from .dgcnn_modules import DGCNNFAModule, DGCNNFPModule, DGCNNGFModule
 from .furthest_point_sample import (Points_Sampler, furthest_point_sample,
                                    furthest_point_sample_with_dist)
 from .gather_points import gather_points
@@ -17,8 +18,9 @@ from .pointnet_modules import (PAConvCUDASAModule, PAConvCUDASAModuleMSG,
                               PAConvSAModule, PAConvSAModuleMSG,
                               PointFPModule, PointSAModule, PointSAModuleMSG,
                               build_sa_module)
-from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_batch,
-                              points_in_boxes_cpu, points_in_boxes_gpu)
+from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_all,
+                              points_in_boxes_cpu, points_in_boxes_part)
+from .roipoint_pool3d import RoIPointPool3d
 from .sparse_block import (SparseBasicBlock, SparseBottleneck,
                           make_sparse_convmodule)
 from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization
@@ -29,13 +31,14 @@ __all__ = [
    'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization',
    'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss',
    'SigmoidFocalLoss', 'SparseBasicBlock', 'SparseBottleneck',
-    'RoIAwarePool3d', 'points_in_boxes_gpu', 'points_in_boxes_cpu',
+    'RoIAwarePool3d', 'points_in_boxes_part', 'points_in_boxes_cpu',
    'make_sparse_convmodule', 'ball_query', 'knn', 'furthest_point_sample',
    'furthest_point_sample_with_dist', 'three_interpolate', 'three_nn',
    'gather_points', 'grouping_operation', 'group_points', 'GroupAll',
    'QueryAndGroup', 'PointSAModule', 'PointSAModuleMSG', 'PointFPModule',
-    'points_in_boxes_batch', 'get_compiler_version', 'assign_score_withk',
-    'get_compiling_cuda_version', 'Points_Sampler', 'build_sa_module',
-    'PAConv', 'PAConvCUDA', 'PAConvSAModuleMSG', 'PAConvSAModule',
-    'PAConvCUDASAModule', 'PAConvCUDASAModuleMSG'
+    'DGCNNFPModule', 'DGCNNGFModule', 'DGCNNFAModule', 'points_in_boxes_all',
+    'get_compiler_version', 'assign_score_withk', 'get_compiling_cuda_version',
+    'Points_Sampler', 'build_sa_module', 'PAConv', 'PAConvCUDA',
+    'PAConvSAModuleMSG', 'PAConvSAModule', 'PAConvCUDASAModule',
+    'PAConvCUDASAModuleMSG', 'RoIPointPool3d'
 ]
--- a/mmdet3d/ops/ball_query/__init__.py
+++ b/mmdet3d/ops/ball_query/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
 from .ball_query import ball_query

 __all__ = ['ball_query']
--- a/mmdet3d/ops/ball_query/ball_query.py
+++ b/mmdet3d/ops/ball_query/ball_query.py
+# Copyright (c) OpenMMLab. All rights reserved.
 import torch
 from torch.autograd import Function

@@ -23,7 +24,7 @@ class BallQuery(Function):
            center_xyz (Tensor): (B, npoint, 3) centers of the ball query.

        Returns:
-            Tensor: (B, npoint, nsample) tensor with the indicies of
+            Tensor: (B, npoint, nsample) tensor with the indices of
                the features that form the query balls.
        """
        assert center_xyz.is_contiguous()