[Feature] Support PAConv modules (#599)

* refactor PN SA module code * refactor PAConv op & add PAConvSAModule * add unit test * add PAConvCUDA SA module * add unit test * minor fix * merge master * move paconv sa modules to a new file * rename to BasePointSAModule * rename to PAConvCUDASAModule * delete typing comment * refine docs * rename mlp_spec to mlp_channel * fix typos in comment

[Feature] Support PAConv modules (#599)
* refactor PN SA module code * refactor PAConv op & add PAConvSAModule * add unit test * add PAConvCUDA SA module * add unit test * minor fix * merge master * move paconv sa modules to a new file * rename to BasePointSAModule * rename to PAConvCUDASAModule * delete typing comment * refine docs * rename mlp_spec to mlp_channel * fix typos in comment
b07fb946 · Ziyi Wu · GitHub · c1748ef5 · b07fb946 · b07fb946
Unverified Commit b07fb946 authored Jun 16, 2021 by Ziyi Wu Committed by GitHub Jun 16, 2021
11 changed files
--- a/mmdet3d/models/middle_encoders/sparse_unet.py
+++ b/mmdet3d/models/middle_encoders/sparse_unet.py
@@ -11,7 +11,7 @@ from ..builder import MIDDLE_ENCODERS
 class SparseUNet(nn.Module):
    r"""SparseUNet for PartA^2.

-    See the `paper <https://arxiv.org/abs/1907.03670>`_ for more detials.
+    See the `paper <https://arxiv.org/abs/1907.03670>`_ for more details.

    Args:
        in_channels (int): The number of input channels.

--- a/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
+++ b/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
@@ -13,7 +13,7 @@ class PointwiseSemanticHead(nn.Module):
    """Semantic segmentation head for point-wise segmentation.

    Predict point-wise segmentation and part regression results for PartA2.
-    See `paper <https://arxiv.org/abs/1907.03670>`_ for more detials.
+    See `paper <https://arxiv.org/abs/1907.03670>`_ for more details.

    Args:
        in_channels (int): The number of input channel.

--- a/mmdet3d/ops/__init__.py
+++ b/mmdet3d/ops/__init__.py
@@ -12,7 +12,9 @@ from .interpolate import three_interpolate, three_nn
 from .knn import knn
 from .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d
 from .paconv import PAConv, PAConvCUDA, assign_score_withk
-from .pointnet_modules import (PointFPModule, PointSAModule, PointSAModuleMSG,
+from .pointnet_modules import (PAConvCUDASAModule, PAConvCUDASAModuleMSG,
+                               PAConvSAModule, PAConvSAModuleMSG,
+                               PointFPModule, PointSAModule, PointSAModuleMSG,
                               build_sa_module)
 from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_batch,
                              points_in_boxes_cpu, points_in_boxes_gpu)
@@ -33,5 +35,6 @@ __all__ = [
    'QueryAndGroup', 'PointSAModule', 'PointSAModuleMSG', 'PointFPModule',
    'points_in_boxes_batch', 'get_compiler_version', 'assign_score_withk',
    'get_compiling_cuda_version', 'Points_Sampler', 'build_sa_module',
-    'PAConv', 'PAConvCUDA'
+    'PAConv', 'PAConvCUDA', 'PAConvSAModuleMSG', 'PAConvSAModule',
+    'PAConvCUDASAModule', 'PAConvCUDASAModuleMSG'
 ]
--- a/mmdet3d/ops/group_points/group_points.py
+++ b/mmdet3d/ops/group_points/group_points.py
@@ -29,6 +29,8 @@ class QueryAndGroup(nn.Module):
        return_unique_cnt (bool): Whether to return the count of
            unique samples.
            Default: False.
+        return_grouped_idx (bool): Whether to return grouped idx.
+            Default: False.
    """

    def __init__(self,
@@ -39,7 +41,8 @@ class QueryAndGroup(nn.Module):
                 return_grouped_xyz=False,
                 normalize_xyz=False,
                 uniform_sample=False,
-                 return_unique_cnt=False):
+                 return_unique_cnt=False,
+                 return_grouped_idx=False):
        super(QueryAndGroup, self).__init__()
        self.max_radius = max_radius
        self.min_radius = min_radius
@@ -49,6 +52,7 @@ class QueryAndGroup(nn.Module):
        self.normalize_xyz = normalize_xyz
        self.uniform_sample = uniform_sample
        self.return_unique_cnt = return_unique_cnt
+        self.return_grouped_idx = return_grouped_idx
        if self.return_unique_cnt:
            assert self.uniform_sample, \
                'uniform_sample should be True when ' \
@@ -116,6 +120,8 @@ class QueryAndGroup(nn.Module):
            ret.append(grouped_xyz)
        if self.return_unique_cnt:
            ret.append(unique_cnt)
+        if self.return_grouped_idx:
+            ret.append(idx)
        if len(ret) == 1:
            return ret[0]
        else:

--- a/mmdet3d/ops/paconv/paconv.py
+++ b/mmdet3d/ops/paconv/paconv.py
@@ -244,18 +244,24 @@ class PAConv(nn.Module):
                                     dim=1)
        return xyz_features

-    def forward(self, points_xyz, features):
+    def forward(self, inputs):
        """Forward.

        Args:
-            points_xyz (torch.Tensor): (B, 3, npoint, K)
-                Coordinates of the grouped points.
-            features (torch.Tensor): (B, in_c, npoint, K)
-                Features of the queried points.
+            inputs (tuple(torch.Tensor)):
+
+                - features (torch.Tensor): (B, in_c, npoint, K)
+                    Features of the queried points.
+                - points_xyz (torch.Tensor): (B, 3, npoint, K)
+                    Coordinates of the grouped points.

        Returns:
-            torch.Tensor: (B, out_c, npoint, K), features after PAConv.
+            Tuple[torch.Tensor]:
+
+                - new_features: (B, out_c, npoint, K), features after PAConv.
+                - points_xyz: same as input.
        """
+        features, points_xyz = inputs
        B, _, npoint, K = features.size()

        if self.kernel_input == 'w_neighbor':
@@ -286,7 +292,9 @@ class PAConv(nn.Module):
        if self.activate is not None:
            new_features = self.activate(new_features)

-        return new_features
+        # in order to keep input output consistency
+        # so that we can wrap PAConv in Sequential
+        return (new_features, points_xyz)


 class PAConvCUDA(PAConv):
@@ -328,22 +336,30 @@ class PAConvCUDA(PAConv):
        assert self.kernel_input == 'w_neighbor', \
            'CUDA implemented PAConv only supports w_neighbor kernel_input'

-    def forward(self, points_xyz, features, points_idx):
+    def forward(self, inputs):
        """Forward.

        Args:
-            points_xyz (torch.Tensor): (B, 3, npoint, K)
-                Coordinates of the grouped points.
-            features (torch.Tensor): (B, in_c, N)
-                Features of all points in the current point cloud.
-                Different from `features` in non-CUDA version PAConv, here the
-                    features are not grouped by each center to form a K dim.
-            points_idx (torch.Tensor): (B, npoint, K)
-                Index of the grouped points.
+            inputs (tuple(torch.Tensor)):
+
+                - features (torch.Tensor): (B, in_c, N)
+                    Features of all points in the current point cloud.
+                    Different from non-CUDA version PAConv, here the features
+                        are not grouped by each center to form a K dim.
+                - points_xyz (torch.Tensor): (B, 3, npoint, K)
+                    Coordinates of the grouped points.
+                - points_idx (torch.Tensor): (B, npoint, K)
+                    Index of the grouped points.

        Returns:
-            torch.Tensor: (B, out_c, npoint, K), features after PAConv.
+            Tuple[torch.Tensor]:
+
+                - new_features: (B, out_c, npoint, K), features after PAConv.
+                - points_xyz: same as input.
+                - points_idx: same as input.
        """
+        features, points_xyz, points_idx = inputs
+
        # prepare features for between each point and its grouping center
        xyz_features = self._prepare_scorenet_input(points_xyz)

@@ -365,4 +381,5 @@ class PAConvCUDA(PAConv):
        if self.activate is not None:
            new_features = self.activate(new_features)

-        return new_features
+        # in order to keep input output consistency
+        return (new_features, points_xyz, points_idx)
--- a/mmdet3d/ops/paconv/utils.py
+++ b/mmdet3d/ops/paconv/utils.py
@@ -57,9 +57,10 @@ def assign_kernel_withoutk(features, kernels, M):
        M (int): Number of weight matrices in the weight bank.

    Returns:
-        Tuple[torch.Tensor]: both of shape (B, N, M, out_dim)
-            point_features: Pre-computed features for points.
-            center_features: Pre-computed features for centers.
+        Tuple[torch.Tensor]: both of shape (B, N, M, out_dim):
+
+            - point_features: Pre-computed features for points.
+            - center_features: Pre-computed features for centers.
    """
    B, in_dim, N = features.size()
    feat_trans = features.permute(0, 2, 1)  # [B, N, in_dim]

--- a/mmdet3d/ops/pointnet_modules/__init__.py
+++ b/mmdet3d/ops/pointnet_modules/__init__.py
 from .builder import build_sa_module
+from .paconv_sa_module import (PAConvCUDASAModule, PAConvCUDASAModuleMSG,
+                               PAConvSAModule, PAConvSAModuleMSG)
 from .point_fp_module import PointFPModule
 from .point_sa_module import PointSAModule, PointSAModuleMSG

 __all__ = [
-    'build_sa_module', 'PointSAModuleMSG', 'PointSAModule', 'PointFPModule'
+    'build_sa_module', 'PointSAModuleMSG', 'PointSAModule', 'PointFPModule',
+    'PAConvSAModule', 'PAConvSAModuleMSG', 'PAConvCUDASAModule',
+    'PAConvCUDASAModuleMSG'
 ]
--- a/mmdet3d/ops/pointnet_modules/paconv_sa_module.py
+++ b/mmdet3d/ops/pointnet_modules/paconv_sa_module.py
+import torch
+from torch import nn as nn
+
+from mmdet3d.ops import PAConv, PAConvCUDA
+from .builder import SA_MODULES
+from .point_sa_module import BasePointSAModule
+
+
+@SA_MODULES.register_module()
+class PAConvSAModuleMSG(BasePointSAModule):
+    r"""Point set abstraction module with multi-scale grouping (MSG) used in
+    PAConv networks.
+
+    Replace the MLPs in `PointSAModuleMSG` with PAConv layers.
+    See the `paper <https://arxiv.org/abs/2103.14635>`_ for more details.
+
+    Args:
+        paconv_num_kernels (list[list[int]]): Number of weight kernels in the
+            weight banks of each layer's PAConv.
+        paconv_kernel_input (str, optional): Input features to be multiplied
+            with weight kernels. Can be 'identity' or 'w_neighbor'.
+            Defaults to 'w_neighbor'.
+        scorenet_input (str, optional): Type of the input to ScoreNet.
+            Defaults to 'w_neighbor_dist'. Can be the following values:
+
+            - 'identity': Use xyz coordinates as input.
+            - 'w_neighbor': Use xyz coordinates and the difference with center
+                points as input.
+            - 'w_neighbor_dist': Use xyz coordinates, the difference with
+                center points and the Euclidian distance as input.
+
+        scorenet_cfg (dict, optional): Config of the ScoreNet module, which
+            may contain the following keys and values:
+
+            - mlp_channels (List[int]): Hidden units of MLPs.
+            - score_norm (str): Normalization function of output scores.
+                Can be 'softmax', 'sigmoid' or 'identity'.
+            - temp_factor (float): Temperature factor to scale the output
+                scores before softmax.
+            - last_bn (bool): Whether to use BN on the last output of mlps.
+    """
+
+    def __init__(self,
+                 num_point,
+                 radii,
+                 sample_nums,
+                 mlp_channels,
+                 paconv_num_kernels,
+                 fps_mod=['D-FPS'],
+                 fps_sample_range_list=[-1],
+                 dilated_group=False,
+                 norm_cfg=dict(type='BN2d', momentum=0.1),
+                 use_xyz=True,
+                 pool_mod='max',
+                 normalize_xyz=False,
+                 bias='auto',
+                 paconv_kernel_input='w_neighbor',
+                 scorenet_input='w_neighbor_dist',
+                 scorenet_cfg=dict(
+                     mlp_channels=[16, 16, 16],
+                     score_norm='softmax',
+                     temp_factor=1.0,
+                     last_bn=False)):
+        super(PAConvSAModuleMSG, self).__init__(
+            num_point=num_point,
+            radii=radii,
+            sample_nums=sample_nums,
+            mlp_channels=mlp_channels,
+            fps_mod=fps_mod,
+            fps_sample_range_list=fps_sample_range_list,
+            dilated_group=dilated_group,
+            use_xyz=use_xyz,
+            pool_mod=pool_mod,
+            normalize_xyz=normalize_xyz,
+            grouper_return_grouped_xyz=True)
+
+        assert len(paconv_num_kernels) == len(mlp_channels)
+        for i in range(len(mlp_channels)):
+            assert len(paconv_num_kernels[i]) == len(mlp_channels[i]) - 1, \
+                'PAConv number of weight kernels wrong'
+
+        # in PAConv, bias only exists in ScoreNet
+        scorenet_cfg['bias'] = bias
+
+        for i in range(len(self.mlp_channels)):
+            mlp_channel = self.mlp_channels[i]
+            if use_xyz:
+                mlp_channel[0] += 3
+
+            num_kernels = paconv_num_kernels[i]
+
+            mlp = nn.Sequential()
+            for i in range(len(mlp_channel) - 1):
+                mlp.add_module(
+                    f'layer{i}',
+                    PAConv(
+                        mlp_channel[i],
+                        mlp_channel[i + 1],
+                        num_kernels[i],
+                        norm_cfg=norm_cfg,
+                        kernel_input=paconv_kernel_input,
+                        scorenet_input=scorenet_input,
+                        scorenet_cfg=scorenet_cfg))
+            self.mlps.append(mlp)
+
+
+@SA_MODULES.register_module()
+class PAConvSAModule(PAConvSAModuleMSG):
+    r"""Point set abstraction module with single-scale grouping (SSG) used in
+    PAConv networks.
+
+    Replace the MLPs in `PointSAModule` with PAConv layers. See the `paper
+    <https://arxiv.org/abs/2103.14635>`_ for more details.
+    """
+
+    def __init__(self,
+                 mlp_channels,
+                 paconv_num_kernels,
+                 num_point=None,
+                 radius=None,
+                 num_sample=None,
+                 norm_cfg=dict(type='BN2d', momentum=0.1),
+                 use_xyz=True,
+                 pool_mod='max',
+                 fps_mod=['D-FPS'],
+                 fps_sample_range_list=[-1],
+                 normalize_xyz=False,
+                 paconv_kernel_input='w_neighbor',
+                 scorenet_input='w_neighbor_dist',
+                 scorenet_cfg=dict(
+                     mlp_channels=[16, 16, 16],
+                     score_norm='softmax',
+                     temp_factor=1.0,
+                     last_bn=False)):
+        super(PAConvSAModule, self).__init__(
+            mlp_channels=[mlp_channels],
+            paconv_num_kernels=[paconv_num_kernels],
+            num_point=num_point,
+            radii=[radius],
+            sample_nums=[num_sample],
+            norm_cfg=norm_cfg,
+            use_xyz=use_xyz,
+            pool_mod=pool_mod,
+            fps_mod=fps_mod,
+            fps_sample_range_list=fps_sample_range_list,
+            normalize_xyz=normalize_xyz,
+            paconv_kernel_input=paconv_kernel_input,
+            scorenet_input=scorenet_input,
+            scorenet_cfg=scorenet_cfg)
+
+
+@SA_MODULES.register_module()
+class PAConvCUDASAModuleMSG(BasePointSAModule):
+    r"""Point set abstraction module with multi-scale grouping (MSG) used in
+    PAConv networks.
+
+    Replace the non CUDA version PAConv with CUDA implemented PAConv for
+    efficient computation. See the `paper <https://arxiv.org/abs/2103.14635>`_
+    for more details.
+    """
+
+    def __init__(self,
+                 num_point,
+                 radii,
+                 sample_nums,
+                 mlp_channels,
+                 paconv_num_kernels,
+                 fps_mod=['D-FPS'],
+                 fps_sample_range_list=[-1],
+                 dilated_group=False,
+                 norm_cfg=dict(type='BN2d', momentum=0.1),
+                 use_xyz=True,
+                 pool_mod='max',
+                 normalize_xyz=False,
+                 bias='auto',
+                 paconv_kernel_input='w_neighbor',
+                 scorenet_input='w_neighbor_dist',
+                 scorenet_cfg=dict(
+                     mlp_channels=[8, 16, 16],
+                     score_norm='softmax',
+                     temp_factor=1.0,
+                     last_bn=False)):
+        super(PAConvCUDASAModuleMSG, self).__init__(
+            num_point=num_point,
+            radii=radii,
+            sample_nums=sample_nums,
+            mlp_channels=mlp_channels,
+            fps_mod=fps_mod,
+            fps_sample_range_list=fps_sample_range_list,
+            dilated_group=dilated_group,
+            use_xyz=use_xyz,
+            pool_mod=pool_mod,
+            normalize_xyz=normalize_xyz,
+            grouper_return_grouped_xyz=True,
+            grouper_return_grouped_idx=True)
+
+        assert len(paconv_num_kernels) == len(mlp_channels)
+        for i in range(len(mlp_channels)):
+            assert len(paconv_num_kernels[i]) == len(mlp_channels[i]) - 1, \
+                'PAConv number of weight kernels wrong'
+
+        # in PAConv, bias only exists in ScoreNet
+        scorenet_cfg['bias'] = bias
+
+        # we need to manually concat xyz for CUDA implemented PAConv
+        self.use_xyz = use_xyz
+
+        for i in range(len(self.mlp_channels)):
+            mlp_channel = self.mlp_channels[i]
+            if use_xyz:
+                mlp_channel[0] += 3
+
+            num_kernels = paconv_num_kernels[i]
+
+            # can't use `nn.Sequential` for PAConvCUDA because its input and
+            # output have different shapes
+            mlp = nn.ModuleList()
+            for i in range(len(mlp_channel) - 1):
+                mlp.append(
+                    PAConvCUDA(
+                        mlp_channel[i],
+                        mlp_channel[i + 1],
+                        num_kernels[i],
+                        norm_cfg=norm_cfg,
+                        kernel_input=paconv_kernel_input,
+                        scorenet_input=scorenet_input,
+                        scorenet_cfg=scorenet_cfg))
+            self.mlps.append(mlp)
+
+    def forward(
+        self,
+        points_xyz,
+        features=None,
+        indices=None,
+        target_xyz=None,
+    ):
+        """forward.
+
+        Args:
+            points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.
+            features (Tensor): (B, C, N) features of each point.
+                Default: None.
+            indices (Tensor): (B, num_point) Index of the features.
+                Default: None.
+            target_xyz (Tensor): (B, M, 3) new_xyz coordinates of the outputs.
+
+        Returns:
+            Tensor: (B, M, 3) where M is the number of points.
+                New features xyz.
+            Tensor: (B, M, sum_k(mlps[k][-1])) where M is the number
+                of points. New feature descriptors.
+            Tensor: (B, M) where M is the number of points.
+                Index of the features.
+        """
+        new_features_list = []
+
+        # sample points, (B, num_point, 3), (B, num_point)
+        new_xyz, indices = self._sample_points(points_xyz, features, indices,
+                                               target_xyz)
+
+        for i in range(len(self.groupers)):
+            xyz = points_xyz
+            new_features = features
+            for j in range(len(self.mlps[i])):
+                # we don't use grouped_features here to avoid large GPU memory
+                # _, (B, 3, num_point, nsample), (B, num_point, nsample)
+                _, grouped_xyz, grouped_idx = self.groupers[i](xyz, new_xyz,
+                                                               new_features)
+
+                # concat xyz as additional features
+                if self.use_xyz and j == 0:
+                    # (B, C+3, N)
+                    new_features = torch.cat(
+                        (points_xyz.permute(0, 2, 1), new_features), dim=1)
+
+                # (B, out_c, num_point, nsample)
+                grouped_new_features = self.mlps[i][j](
+                    (new_features, grouped_xyz, grouped_idx.long()))[0]
+
+                # different from PointNet++ and non CUDA version of PAConv
+                # CUDA version of PAConv needs to aggregate local features
+                # every time after it passes through a Conv layer
+                # in order to transform to valid input shape
+                # (B, out_c, num_point)
+                new_features = self._pool_features(grouped_new_features)
+
+                # constrain the points to be grouped for next PAConv layer
+                # because new_features only contains sampled centers now
+                # (B, num_point, 3)
+                xyz = new_xyz
+
+            new_features_list.append(new_features)
+
+        return new_xyz, torch.cat(new_features_list, dim=1), indices
+
+
+@SA_MODULES.register_module()
+class PAConvCUDASAModule(PAConvCUDASAModuleMSG):
+    r"""Point set abstraction module with single-scale grouping (SSG) used in
+    PAConv networks.
+
+    Replace the non CUDA version PAConv with CUDA implemented PAConv for
+    efficient computation. See the `paper <https://arxiv.org/abs/2103.14635>`_
+    for more details.
+    """
+
+    def __init__(self,
+                 mlp_channels,
+                 paconv_num_kernels,
+                 num_point=None,
+                 radius=None,
+                 num_sample=None,
+                 norm_cfg=dict(type='BN2d', momentum=0.1),
+                 use_xyz=True,
+                 pool_mod='max',
+                 fps_mod=['D-FPS'],
+                 fps_sample_range_list=[-1],
+                 normalize_xyz=False,
+                 paconv_kernel_input='w_neighbor',
+                 scorenet_input='w_neighbor_dist',
+                 scorenet_cfg=dict(
+                     mlp_channels=[8, 16, 16],
+                     score_norm='softmax',
+                     temp_factor=1.0,
+                     last_bn=False)):
+        super(PAConvCUDASAModule, self).__init__(
+            mlp_channels=[mlp_channels],
+            paconv_num_kernels=[paconv_num_kernels],
+            num_point=num_point,
+            radii=[radius],
+            sample_nums=[num_sample],
+            norm_cfg=norm_cfg,
+            use_xyz=use_xyz,
+            pool_mod=pool_mod,
+            fps_mod=fps_mod,
+            fps_sample_range_list=fps_sample_range_list,
+            normalize_xyz=normalize_xyz,
+            paconv_kernel_input=paconv_kernel_input,
+            scorenet_input=scorenet_input,
+            scorenet_cfg=scorenet_cfg)
--- a/mmdet3d/ops/pointnet_modules/point_sa_module.py
+++ b/mmdet3d/ops/pointnet_modules/point_sa_module.py
@@ -2,22 +2,20 @@ import torch
 from mmcv.cnn import ConvModule
 from torch import nn as nn
 from torch.nn import functional as F
-from typing import List

-from mmdet3d.ops import GroupAll, Points_Sampler, QueryAndGroup, gather_points
+from mmdet3d.ops import (GroupAll, PAConv, Points_Sampler, QueryAndGroup,
+                         gather_points)
 from .builder import SA_MODULES


-@SA_MODULES.register_module()
-class PointSAModuleMSG(nn.Module):
-    """Point set abstraction module with multi-scale grouping used in
-    Pointnets.
+class BasePointSAModule(nn.Module):
+    """Base module for point set abstraction module used in PointNets.

    Args:
        num_point (int): Number of points.
        radii (list[float]): List of radius in each ball query.
        sample_nums (list[int]): Number of samples in each ball query.
-        mlp_channels (list[int]): Specify of the pointnet before
+        mlp_channels (list[list[int]]): Specify of the pointnet before
            the global pooling for each scale.
        fps_mod (list[str]: Type of FPS method, valid mod
            ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS'].
@@ -28,33 +26,32 @@ class PointSAModuleMSG(nn.Module):
            Default: [-1].
        dilated_group (bool): Whether to use dilated ball query.
            Default: False.
-        norm_cfg (dict): Type of normalization method.
-            Default: dict(type='BN2d').
        use_xyz (bool): Whether to use xyz.
            Default: True.
        pool_mod (str): Type of pooling method.
            Default: 'max_pool'.
        normalize_xyz (bool): Whether to normalize local XYZ with radius.
            Default: False.
-        bias (bool | str): If specified as `auto`, it will be decided by the
-            norm_cfg. Bias will be set as True if `norm_cfg` is None, otherwise
-            False. Default: "auto".
+        grouper_return_grouped_xyz (bool): Whether to return grouped xyz in
+            `QueryAndGroup`. Defaults to False.
+        grouper_return_grouped_idx (bool): Whether to return grouped idx in
+            `QueryAndGroup`. Defaults to False.
    """

    def __init__(self,
-                 num_point: int,
-                 radii: List[float],
-                 sample_nums: List[int],
-                 mlp_channels: List[List[int]],
-                 fps_mod: List[str] = ['D-FPS'],
-                 fps_sample_range_list: List[int] = [-1],
-                 dilated_group: bool = False,
-                 norm_cfg: dict = dict(type='BN2d'),
-                 use_xyz: bool = True,
+                 num_point,
+                 radii,
+                 sample_nums,
+                 mlp_channels,
+                 fps_mod=['D-FPS'],
+                 fps_sample_range_list=[-1],
+                 dilated_group=False,
+                 use_xyz=True,
                 pool_mod='max',
-                 normalize_xyz: bool = False,
-                 bias='auto'):
-        super().__init__()
+                 normalize_xyz=False,
+                 grouper_return_grouped_xyz=False,
+                 grouper_return_grouped_idx=False):
+        super(BasePointSAModule, self).__init__()

        assert len(radii) == len(sample_nums) == len(mlp_channels)
        assert pool_mod in ['max', 'avg']
@@ -65,6 +62,7 @@ class PointSAModuleMSG(nn.Module):

        if isinstance(mlp_channels, tuple):
            mlp_channels = list(map(list, mlp_channels))
+        self.mlp_channels = mlp_channels

        if isinstance(num_point, int):
            self.num_point = [num_point]
@@ -95,37 +93,19 @@ class PointSAModuleMSG(nn.Module):
                    sample_num,
                    min_radius=min_radius,
                    use_xyz=use_xyz,
-                    normalize_xyz=normalize_xyz)
+                    normalize_xyz=normalize_xyz,
+                    return_grouped_xyz=grouper_return_grouped_xyz,
+                    return_grouped_idx=grouper_return_grouped_idx)
            else:
                grouper = GroupAll(use_xyz)
            self.groupers.append(grouper)

-            mlp_spec = mlp_channels[i]
-            if use_xyz:
-                mlp_spec[0] += 3
-
-            mlp = nn.Sequential()
-            for i in range(len(mlp_spec) - 1):
-                mlp.add_module(
-                    f'layer{i}',
-                    ConvModule(
-                        mlp_spec[i],
-                        mlp_spec[i + 1],
-                        kernel_size=(1, 1),
-                        stride=(1, 1),
-                        conv_cfg=dict(type='Conv2d'),
-                        norm_cfg=norm_cfg,
-                        bias=bias))
-            self.mlps.append(mlp)
+    def _sample_points(self, points_xyz, features, indices, target_xyz):
+        """Perform point sampling based on inputs.

-    def forward(
-        self,
-        points_xyz: torch.Tensor,
-        features: torch.Tensor = None,
-        indices: torch.Tensor = None,
-        target_xyz: torch.Tensor = None,
-    ) -> (torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor):
-        """forward.
+        If `indices` is specified, directly sample corresponding points.
+        Else if `target_xyz` is specified, use is as sampled points.
+        Otherwise sample points using `self.points_sampler`.

        Args:
            points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.
@@ -136,14 +116,9 @@ class PointSAModuleMSG(nn.Module):
            target_xyz (Tensor): (B, M, 3) new_xyz coordinates of the outputs.

        Returns:
-            Tensor: (B, M, 3) where M is the number of points.
-                New features xyz.
-            Tensor: (B, M, sum_k(mlps[k][-1])) where M is the number
-                of points. New feature descriptors.
-            Tensor: (B, M) where M is the number of points.
-                Index of the features.
+            Tensor: (B, num_point, 3) sampled xyz coordinates of points.
+            Tensor: (B, num_point) sampled points' index.
        """
-        new_features_list = []
        xyz_flipped = points_xyz.transpose(1, 2).contiguous()
        if indices is not None:
            assert (indices.shape[1] == self.num_point[0])
@@ -156,32 +131,168 @@ class PointSAModuleMSG(nn.Module):
            new_xyz = gather_points(xyz_flipped, indices).transpose(
                1, 2).contiguous() if self.num_point is not None else None

+        return new_xyz, indices
+
+    def _pool_features(self, features):
+        """Perform feature aggregation using pooling operation.
+
+        Args:
+            features (torch.Tensor): (B, C, N, K)
+                Features of locally grouped points before pooling.
+
+        Returns:
+            torch.Tensor: (B, C, N)
+                Pooled features aggregating local information.
+        """
+        if self.pool_mod == 'max':
+            # (B, C, N, 1)
+            new_features = F.max_pool2d(
+                features, kernel_size=[1, features.size(3)])
+        elif self.pool_mod == 'avg':
+            # (B, C, N, 1)
+            new_features = F.avg_pool2d(
+                features, kernel_size=[1, features.size(3)])
+        else:
+            raise NotImplementedError
+
+        return new_features.squeeze(-1).contiguous()
+
+    def forward(
+        self,
+        points_xyz,
+        features=None,
+        indices=None,
+        target_xyz=None,
+    ):
+        """forward.
+
+        Args:
+            points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.
+            features (Tensor): (B, C, N) features of each point.
+                Default: None.
+            indices (Tensor): (B, num_point) Index of the features.
+                Default: None.
+            target_xyz (Tensor): (B, M, 3) new_xyz coordinates of the outputs.
+
+        Returns:
+            Tensor: (B, M, 3) where M is the number of points.
+                New features xyz.
+            Tensor: (B, M, sum_k(mlps[k][-1])) where M is the number
+                of points. New feature descriptors.
+            Tensor: (B, M) where M is the number of points.
+                Index of the features.
+        """
+        new_features_list = []
+
+        # sample points, (B, num_point, 3), (B, num_point)
+        new_xyz, indices = self._sample_points(points_xyz, features, indices,
+                                               target_xyz)
+
        for i in range(len(self.groupers)):
-            # (B, C, num_point, nsample)
-            new_features = self.groupers[i](points_xyz, new_xyz, features)
+            # grouped_results may contain:
+            # - grouped_features: (B, C, num_point, nsample)
+            # - grouped_xyz: (B, 3, num_point, nsample)
+            # - grouped_idx: (B, num_point, nsample)
+            grouped_results = self.groupers[i](points_xyz, new_xyz, features)

            # (B, mlp[-1], num_point, nsample)
-            new_features = self.mlps[i](new_features)
-            if self.pool_mod == 'max':
-                # (B, mlp[-1], num_point, 1)
-                new_features = F.max_pool2d(
-                    new_features, kernel_size=[1, new_features.size(3)])
-            elif self.pool_mod == 'avg':
-                # (B, mlp[-1], num_point, 1)
-                new_features = F.avg_pool2d(
-                    new_features, kernel_size=[1, new_features.size(3)])
-            else:
-                raise NotImplementedError
+            new_features = self.mlps[i](grouped_results)

-            new_features = new_features.squeeze(-1)  # (B, mlp[-1], num_point)
+            # this is a bit hack because PAConv outputs two values
+            # we take the first one as feature
+            if isinstance(self.mlps[i][0], PAConv):
+                assert isinstance(new_features, tuple)
+                new_features = new_features[0]
+
+            # (B, mlp[-1], num_point)
+            new_features = self._pool_features(new_features)
            new_features_list.append(new_features)

        return new_xyz, torch.cat(new_features_list, dim=1), indices


+@SA_MODULES.register_module()
+class PointSAModuleMSG(BasePointSAModule):
+    """Point set abstraction module with multi-scale grouping (MSG) used in
+    PointNets.
+
+    Args:
+        num_point (int): Number of points.
+        radii (list[float]): List of radius in each ball query.
+        sample_nums (list[int]): Number of samples in each ball query.
+        mlp_channels (list[list[int]]): Specify of the pointnet before
+            the global pooling for each scale.
+        fps_mod (list[str]: Type of FPS method, valid mod
+            ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS'].
+            F-FPS: using feature distances for FPS.
+            D-FPS: using Euclidean distances of points for FPS.
+            FS: using F-FPS and D-FPS simultaneously.
+        fps_sample_range_list (list[int]): Range of points to apply FPS.
+            Default: [-1].
+        dilated_group (bool): Whether to use dilated ball query.
+            Default: False.
+        norm_cfg (dict): Type of normalization method.
+            Default: dict(type='BN2d').
+        use_xyz (bool): Whether to use xyz.
+            Default: True.
+        pool_mod (str): Type of pooling method.
+            Default: 'max_pool'.
+        normalize_xyz (bool): Whether to normalize local XYZ with radius.
+            Default: False.
+        bias (bool | str): If specified as `auto`, it will be decided by the
+            norm_cfg. Bias will be set as True if `norm_cfg` is None, otherwise
+            False. Default: "auto".
+    """
+
+    def __init__(self,
+                 num_point,
+                 radii,
+                 sample_nums,
+                 mlp_channels,
+                 fps_mod=['D-FPS'],
+                 fps_sample_range_list=[-1],
+                 dilated_group=False,
+                 norm_cfg=dict(type='BN2d'),
+                 use_xyz=True,
+                 pool_mod='max',
+                 normalize_xyz=False,
+                 bias='auto'):
+        super(PointSAModuleMSG, self).__init__(
+            num_point=num_point,
+            radii=radii,
+            sample_nums=sample_nums,
+            mlp_channels=mlp_channels,
+            fps_mod=fps_mod,
+            fps_sample_range_list=fps_sample_range_list,
+            dilated_group=dilated_group,
+            use_xyz=use_xyz,
+            pool_mod=pool_mod,
+            normalize_xyz=normalize_xyz)
+
+        for i in range(len(self.mlp_channels)):
+            mlp_channel = self.mlp_channels[i]
+            if use_xyz:
+                mlp_channel[0] += 3
+
+            mlp = nn.Sequential()
+            for i in range(len(mlp_channel) - 1):
+                mlp.add_module(
+                    f'layer{i}',
+                    ConvModule(
+                        mlp_channel[i],
+                        mlp_channel[i + 1],
+                        kernel_size=(1, 1),
+                        stride=(1, 1),
+                        conv_cfg=dict(type='Conv2d'),
+                        norm_cfg=norm_cfg,
+                        bias=bias))
+            self.mlps.append(mlp)
+
+
 @SA_MODULES.register_module()
 class PointSAModule(PointSAModuleMSG):
-    """Point set abstraction module used in Pointnets.
+    """Point set abstraction module with single-scale grouping (SSG) used in
+    PointNets.

    Args:
        mlp_channels (list[int]): Specify of the pointnet before
@@ -207,17 +318,17 @@ class PointSAModule(PointSAModuleMSG):
    """

    def __init__(self,
-                 mlp_channels: List[int],
-                 num_point: int = None,
-                 radius: float = None,
-                 num_sample: int = None,
-                 norm_cfg: dict = dict(type='BN2d'),
-                 use_xyz: bool = True,
-                 pool_mod: str = 'max',
-                 fps_mod: List[str] = ['D-FPS'],
-                 fps_sample_range_list: List[int] = [-1],
-                 normalize_xyz: bool = False):
-        super().__init__(
+                 mlp_channels,
+                 num_point=None,
+                 radius=None,
+                 num_sample=None,
+                 norm_cfg=dict(type='BN2d'),
+                 use_xyz=True,
+                 pool_mod='max',
+                 fps_mod=['D-FPS'],
+                 fps_sample_range_list=[-1],
+                 normalize_xyz=False):
+        super(PointSAModule, self).__init__(
            mlp_channels=[mlp_channels],
            num_point=num_point,
            radii=[radius],

--- a/tests/test_models/test_common_modules/test_paconv_modules.py
+++ b/tests/test_models/test_common_modules/test_paconv_modules.py
+import numpy as np
+import pytest
+import torch
+
+
+def test_paconv_sa_module_msg():
+    if not torch.cuda.is_available():
+        pytest.skip()
+    from mmdet3d.ops import PAConvSAModuleMSG
+
+    # paconv_num_kernels should have same length as mlp_channels
+    with pytest.raises(AssertionError):
+        self = PAConvSAModuleMSG(
+            num_point=16,
+            radii=[0.2, 0.4],
+            sample_nums=[4, 8],
+            mlp_channels=[[12, 16], [12, 32]],
+            paconv_num_kernels=[[4]]).cuda()
+
+    # paconv_num_kernels inner num should match as mlp_channels
+    with pytest.raises(AssertionError):
+        self = PAConvSAModuleMSG(
+            num_point=16,
+            radii=[0.2, 0.4],
+            sample_nums=[4, 8],
+            mlp_channels=[[12, 16], [12, 32]],
+            paconv_num_kernels=[[4, 4], [8, 8]]).cuda()
+
+    self = PAConvSAModuleMSG(
+        num_point=16,
+        radii=[0.2, 0.4],
+        sample_nums=[4, 8],
+        mlp_channels=[[12, 16], [12, 32]],
+        paconv_num_kernels=[[4], [8]],
+        norm_cfg=dict(type='BN2d'),
+        use_xyz=False,
+        pool_mod='max',
+        paconv_kernel_input='w_neighbor').cuda()
+
+    assert self.mlps[0].layer0.weight_bank.shape[0] == 12 * 2
+    assert self.mlps[0].layer0.weight_bank.shape[1] == 16 * 4
+    assert self.mlps[1].layer0.weight_bank.shape[0] == 12 * 2
+    assert self.mlps[1].layer0.weight_bank.shape[1] == 32 * 8
+    assert self.mlps[0].layer0.bn.num_features == 16
+    assert self.mlps[1].layer0.bn.num_features == 32
+
+    assert self.mlps[0].layer0.scorenet.mlps.layer0.conv.in_channels == 7
+    assert self.mlps[0].layer0.scorenet.mlps.layer3.conv.out_channels == 4
+    assert self.mlps[1].layer0.scorenet.mlps.layer0.conv.in_channels == 7
+    assert self.mlps[1].layer0.scorenet.mlps.layer3.conv.out_channels == 8
+
+    # last conv in ScoreNet has neither bn nor relu
+    with pytest.raises(AttributeError):
+        _ = self.mlps[0].layer0.scorenet.mlps.layer3.bn
+    with pytest.raises(AttributeError):
+        _ = self.mlps[0].layer0.scorenet.mlps.layer3.activate
+
+    xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', np.float32)
+
+    # (B, N, 3)
+    xyz = torch.from_numpy(xyz).view(1, -1, 3).cuda()
+    # (B, C, N)
+    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()
+
+    # test forward
+    new_xyz, new_features, inds = self(xyz, features)
+    assert new_xyz.shape == torch.Size([1, 16, 3])
+    assert new_features.shape == torch.Size([1, 48, 16])
+    assert inds.shape == torch.Size([1, 16])
+
+    # test with identity kernel input
+    self = PAConvSAModuleMSG(
+        num_point=16,
+        radii=[0.2, 0.4],
+        sample_nums=[4, 8],
+        mlp_channels=[[12, 16], [12, 32]],
+        paconv_num_kernels=[[4], [8]],
+        norm_cfg=dict(type='BN2d'),
+        use_xyz=False,
+        pool_mod='max',
+        paconv_kernel_input='identity').cuda()
+
+    assert self.mlps[0].layer0.weight_bank.shape[0] == 12 * 1
+    assert self.mlps[0].layer0.weight_bank.shape[1] == 16 * 4
+    assert self.mlps[1].layer0.weight_bank.shape[0] == 12 * 1
+    assert self.mlps[1].layer0.weight_bank.shape[1] == 32 * 8
+
+    xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', np.float32)
+
+    # (B, N, 3)
+    xyz = torch.from_numpy(xyz).view(1, -1, 3).cuda()
+    # (B, C, N)
+    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()
+
+    # test forward
+    new_xyz, new_features, inds = self(xyz, features)
+    assert new_xyz.shape == torch.Size([1, 16, 3])
+    assert new_features.shape == torch.Size([1, 48, 16])
+    assert inds.shape == torch.Size([1, 16])
+
+
+def test_paconv_sa_module():
+    if not torch.cuda.is_available():
+        pytest.skip()
+    from mmdet3d.ops import build_sa_module
+    sa_cfg = dict(
+        type='PAConvSAModule',
+        num_point=16,
+        radius=0.2,
+        num_sample=8,
+        mlp_channels=[12, 32],
+        paconv_num_kernels=[8],
+        norm_cfg=dict(type='BN2d'),
+        use_xyz=True,
+        pool_mod='max',
+        paconv_kernel_input='w_neighbor')
+    self = build_sa_module(sa_cfg).cuda()
+
+    assert self.mlps[0].layer0.weight_bank.shape[0] == 15 * 2
+    assert self.mlps[0].layer0.weight_bank.shape[1] == 32 * 8
+
+    xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', np.float32)
+
+    # (B, N, 3)
+    xyz = torch.from_numpy(xyz[..., :3]).view(1, -1, 3).cuda()
+    # (B, C, N)
+    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()
+
+    # test forward
+    new_xyz, new_features, inds = self(xyz, features)
+    assert new_xyz.shape == torch.Size([1, 16, 3])
+    assert new_features.shape == torch.Size([1, 32, 16])
+    assert inds.shape == torch.Size([1, 16])
+
+    # test kNN sampling when radius is None
+    sa_cfg = dict(
+        type='PAConvSAModule',
+        num_point=16,
+        radius=None,
+        num_sample=8,
+        mlp_channels=[12, 32],
+        paconv_num_kernels=[8],
+        norm_cfg=dict(type='BN2d'),
+        use_xyz=True,
+        pool_mod='max',
+        paconv_kernel_input='identity')
+    self = build_sa_module(sa_cfg).cuda()
+    assert self.mlps[0].layer0.weight_bank.shape[0] == 15 * 1
+
+    xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', np.float32)
+
+    xyz = torch.from_numpy(xyz[..., :3]).view(1, -1, 3).cuda()
+    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()
+    new_xyz, new_features, inds = self(xyz, features)
+    assert new_xyz.shape == torch.Size([1, 16, 3])
+    assert new_features.shape == torch.Size([1, 32, 16])
+    assert inds.shape == torch.Size([1, 16])
+
+
+def test_paconv_cuda_sa_module_msg():
+    if not torch.cuda.is_available():
+        pytest.skip()
+    from mmdet3d.ops import PAConvCUDASAModuleMSG
+
+    # paconv_num_kernels should have same length as mlp_channels
+    with pytest.raises(AssertionError):
+        self = PAConvCUDASAModuleMSG(
+            num_point=16,
+            radii=[0.2, 0.4],
+            sample_nums=[4, 8],
+            mlp_channels=[[12, 16], [12, 32]],
+            paconv_num_kernels=[[4]]).cuda()
+
+    # paconv_num_kernels inner num should match as mlp_channels
+    with pytest.raises(AssertionError):
+        self = PAConvCUDASAModuleMSG(
+            num_point=16,
+            radii=[0.2, 0.4],
+            sample_nums=[4, 8],
+            mlp_channels=[[12, 16], [12, 32]],
+            paconv_num_kernels=[[4, 4], [8, 8]]).cuda()
+
+    self = PAConvCUDASAModuleMSG(
+        num_point=16,
+        radii=[0.2, 0.4],
+        sample_nums=[4, 8],
+        mlp_channels=[[12, 16], [12, 32]],
+        paconv_num_kernels=[[4], [8]],
+        norm_cfg=dict(type='BN2d'),
+        use_xyz=False,
+        pool_mod='max',
+        paconv_kernel_input='w_neighbor').cuda()
+
+    assert self.mlps[0][0].weight_bank.shape[0] == 12 * 2
+    assert self.mlps[0][0].weight_bank.shape[1] == 16 * 4
+    assert self.mlps[1][0].weight_bank.shape[0] == 12 * 2
+    assert self.mlps[1][0].weight_bank.shape[1] == 32 * 8
+    assert self.mlps[0][0].bn.num_features == 16
+    assert self.mlps[1][0].bn.num_features == 32
+
+    assert self.mlps[0][0].scorenet.mlps.layer0.conv.in_channels == 7
+    assert self.mlps[0][0].scorenet.mlps.layer3.conv.out_channels == 4
+    assert self.mlps[1][0].scorenet.mlps.layer0.conv.in_channels == 7
+    assert self.mlps[1][0].scorenet.mlps.layer3.conv.out_channels == 8
+
+    # last conv in ScoreNet has neither bn nor relu
+    with pytest.raises(AttributeError):
+        _ = self.mlps[0][0].scorenet.mlps.layer3.bn
+    with pytest.raises(AttributeError):
+        _ = self.mlps[0][0].scorenet.mlps.layer3.activate
+
+    xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', np.float32)
+
+    # (B, N, 3)
+    xyz = torch.from_numpy(xyz).view(1, -1, 3).cuda()
+    # (B, C, N)
+    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()
+
+    # test forward
+    new_xyz, new_features, inds = self(xyz, features)
+    assert new_xyz.shape == torch.Size([1, 16, 3])
+    assert new_features.shape == torch.Size([1, 48, 16])
+    assert inds.shape == torch.Size([1, 16])
+
+    # CUDA PAConv only supports w_neighbor kernel_input
+    with pytest.raises(AssertionError):
+        self = PAConvCUDASAModuleMSG(
+            num_point=16,
+            radii=[0.2, 0.4],
+            sample_nums=[4, 8],
+            mlp_channels=[[12, 16], [12, 32]],
+            paconv_num_kernels=[[4], [8]],
+            norm_cfg=dict(type='BN2d'),
+            use_xyz=False,
+            pool_mod='max',
+            paconv_kernel_input='identity').cuda()
+
+
+def test_paconv_cuda_sa_module():
+    if not torch.cuda.is_available():
+        pytest.skip()
+    from mmdet3d.ops import build_sa_module
+    sa_cfg = dict(
+        type='PAConvCUDASAModule',
+        num_point=16,
+        radius=0.2,
+        num_sample=8,
+        mlp_channels=[12, 32],
+        paconv_num_kernels=[8],
+        norm_cfg=dict(type='BN2d'),
+        use_xyz=True,
+        pool_mod='max',
+        paconv_kernel_input='w_neighbor')
+    self = build_sa_module(sa_cfg).cuda()
+
+    assert self.mlps[0][0].weight_bank.shape[0] == 15 * 2
+    assert self.mlps[0][0].weight_bank.shape[1] == 32 * 8
+
+    xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', np.float32)
+
+    # (B, N, 3)
+    xyz = torch.from_numpy(xyz[..., :3]).view(1, -1, 3).cuda()
+    # (B, C, N)
+    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()
+
+    # test forward
+    new_xyz, new_features, inds = self(xyz, features)
+    assert new_xyz.shape == torch.Size([1, 16, 3])
+    assert new_features.shape == torch.Size([1, 32, 16])
+    assert inds.shape == torch.Size([1, 16])
+
+    # test kNN sampling when radius is None
+    sa_cfg = dict(
+        type='PAConvCUDASAModule',
+        num_point=16,
+        radius=None,
+        num_sample=8,
+        mlp_channels=[12, 32],
+        paconv_num_kernels=[8],
+        norm_cfg=dict(type='BN2d'),
+        use_xyz=True,
+        pool_mod='max',
+        paconv_kernel_input='w_neighbor')
+    self = build_sa_module(sa_cfg).cuda()
+
+    xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', np.float32)
+
+    xyz = torch.from_numpy(xyz[..., :3]).view(1, -1, 3).cuda()
+    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()
+    new_xyz, new_features, inds = self(xyz, features)
+    assert new_xyz.shape == torch.Size([1, 16, 3])
+    assert new_features.shape == torch.Size([1, 32, 16])
+    assert inds.shape == torch.Size([1, 16])
--- a/tests/test_models/test_common_modules/test_paconv_ops.py
+++ b/tests/test_models/test_common_modules/test_paconv_ops.py
@@ -199,7 +199,7 @@ def test_paconv():
    paconv = PAConv(in_channels, out_channels, 4)

    with torch.no_grad():
-        new_features = paconv(points_xyz, features)
+        new_features, _ = paconv((features, points_xyz))

    assert new_features.shape == torch.Size([B, out_channels, npoint, K])

@@ -220,6 +220,6 @@ def test_paconv_cuda():
    paconv = PAConvCUDA(in_channels, out_channels, 4).cuda()

    with torch.no_grad():
-        new_features = paconv(points_xyz, features, points_idx)
+        new_features, _, _ = paconv((features, points_xyz, points_idx))

    assert new_features.shape == torch.Size([B, out_channels, npoint, K])