Merge branch 'master_temp' into indoor_loading

# Conflicts: # tools/data_converter/sunrgbd_data_utils.py

Merge branch 'master_temp' into indoor_loading
# Conflicts: # tools/data_converter/sunrgbd_data_utils.py
e0d892c7 · liyinhao · 929ebfe8 · f584b970 · e0d892c7 · e0d892c7
Commit e0d892c7 authored May 08, 2020 by liyinhao
20 changed files
--- a/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu
+++ b/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu
-//Modified from
-//https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
-//Points in boxes gpu
-//Written by Shaoshuai Shi
-//All Rights Reserved 2019.
+// Modified from
+// https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
+// Points in boxes gpu
+// Written by Shaoshuai Shi
+// All Rights Reserved 2019.

-
-#include <torch/serialize/tensor.h>
-#include <torch/extension.h>
 #include <assert.h>
-
 #include <math.h>
 #include <stdio.h>
+#include <torch/extension.h>
+#include <torch/serialize/tensor.h>

 #define THREADS_PER_BLOCK 256
-#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
-
-#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
-#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
-#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
+#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
+
+#define CHECK_CUDA(x) \
+  TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
+#define CHECK_CONTIGUOUS(x) \
+  TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
+#define CHECK_INPUT(x) \
+  CHECK_CUDA(x);       \
+  CHECK_CONTIGUOUS(x)
 // #define DEBUG

-
-__device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float rz, float &local_x, float &local_y){
+__device__ inline void lidar_to_local_coords(float shift_x, float shift_y,
+                                             float rz, float &local_x,
+                                             float &local_y) {
  // should rotate pi/2 + alpha to translate LiDAR to local
  float rot_angle = rz + M_PI / 2;
  float cosa = cos(rot_angle), sina = sin(rot_angle);
@@ -29,10 +32,11 @@ __device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float
  local_y = shift_x * sina + shift_y * cosa;
 }

-
-__device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, float &local_x, float &local_y){
+__device__ inline int check_pt_in_box3d(const float *pt, const float *box3d,
+                                        float &local_x, float &local_y) {
  // param pt: (x, y, z)
-    // param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the bottom center
+  // param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the
+  // bottom center
  float x = pt[0], y = pt[1], z = pt[2];
  float cx = box3d[0], cy = box3d[1], cz = box3d[2];
  float w = box3d[3], l = box3d[4], h = box3d[5], rz = box3d[6];
@@ -40,16 +44,19 @@ __device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, flo

  if (fabsf(z - cz) > h / 2.0) return 0;
  lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y);
-    float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) & (local_y > -w / 2.0) & (local_y < w / 2.0);
+  float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) &
+                  (local_y > -w / 2.0) & (local_y < w / 2.0);
  return in_flag;
 }

-
-__global__ void points_in_boxes_kernel(int batch_size, int boxes_num, int pts_num, const float *boxes,
-    const float *pts, int *box_idx_of_points){
-    // params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the bottom center, each box DO NOT overlaps
-    // params pts: (B, npoints, 3) [x, y, z] in LiDAR coordinate
-    // params boxes_idx_of_points: (B, npoints), default -1
+__global__ void points_in_boxes_kernel(int batch_size, int boxes_num,
+                                       int pts_num, const float *boxes,
+                                       const float *pts,
+                                       int *box_idx_of_points) {
+  // params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is
+  // the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x,
+  // y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default
+  // -1

  int bs_idx = blockIdx.y;
  int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
@@ -61,26 +68,28 @@ __global__ void points_in_boxes_kernel(int batch_size, int boxes_num, int pts_nu

  float local_x = 0, local_y = 0;
  int cur_in_flag = 0;
-    for (int k = 0; k < boxes_num; k++){
+  for (int k = 0; k < boxes_num; k++) {
    cur_in_flag = check_pt_in_box3d(pts, boxes + k * 7, local_x, local_y);
-        if (cur_in_flag){
+    if (cur_in_flag) {
      box_idx_of_points[0] = k;
      break;
    }
  }
 }

-
-void points_in_boxes_launcher(int batch_size, int boxes_num, int pts_num, const float *boxes,
-    const float *pts, int *box_idx_of_points){
-    // params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the bottom center, each box DO NOT overlaps
-    // params pts: (B, npoints, 3) [x, y, z] in LiDAR coordinate
-    // params boxes_idx_of_points: (B, npoints), default -1
+void points_in_boxes_launcher(int batch_size, int boxes_num, int pts_num,
+                              const float *boxes, const float *pts,
+                              int *box_idx_of_points) {
+  // params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is
+  // the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x,
+  // y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default
+  // -1
  cudaError_t err;

  dim3 blocks(DIVUP(pts_num, THREADS_PER_BLOCK), batch_size);
  dim3 threads(THREADS_PER_BLOCK);
-    points_in_boxes_kernel<<<blocks, threads>>>(batch_size, boxes_num, pts_num, boxes, pts, box_idx_of_points);
+  points_in_boxes_kernel<<<blocks, threads>>>(batch_size, boxes_num, pts_num,
+                                              boxes, pts, box_idx_of_points);

  err = cudaGetLastError();
  if (cudaSuccess != err) {
@@ -93,10 +102,12 @@ void points_in_boxes_launcher(int batch_size, int boxes_num, int pts_num, const
 #endif
 }

-int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tensor box_idx_of_points_tensor){
-    // params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the bottom center, each box DO NOT overlaps
-    // params pts: (B, npoints, 3) [x, y, z] in LiDAR coordinate
-    // params boxes_idx_of_points: (B, npoints), default -1
+int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor,
+                        at::Tensor box_idx_of_points_tensor) {
+  // params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is
+  // the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x,
+  // y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default
+  // -1

  CHECK_INPUT(boxes_tensor);
  CHECK_INPUT(pts_tensor);
@@ -106,11 +117,12 @@ int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tens
  int boxes_num = boxes_tensor.size(1);
  int pts_num = pts_tensor.size(1);

-    const float *boxes = boxes_tensor.data<float>();
-    const float *pts = pts_tensor.data<float>();
-    int *box_idx_of_points = box_idx_of_points_tensor.data<int>();
+  const float *boxes = boxes_tensor.data_ptr<float>();
+  const float *pts = pts_tensor.data_ptr<float>();
+  int *box_idx_of_points = box_idx_of_points_tensor.data_ptr<int>();

-    points_in_boxes_launcher(batch_size, boxes_num, pts_num, boxes, pts, box_idx_of_points);
+  points_in_boxes_launcher(batch_size, boxes_num, pts_num, boxes, pts,
+                           box_idx_of_points);

  return 1;
 }
--- a/mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d.cpp
+++ b/mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d.cpp
-//Modified from
-//https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
-//RoI-aware point cloud feature pooling
-//Written by Shaoshuai Shi
-//All Rights Reserved 2019.
+// Modified from
+// https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
+// RoI-aware point cloud feature pooling
+// Written by Shaoshuai Shi
+// All Rights Reserved 2019.

-
-#include <torch/serialize/tensor.h>
-#include <torch/extension.h>
 #include <assert.h>
+#include <torch/extension.h>
+#include <torch/serialize/tensor.h>

-
-#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
-#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
-#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
-
-
-void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, int max_pts_each_voxel,
-    int out_x, int out_y, int out_z, const float *rois, const float *pts, const float *pts_feature,
-    int *argmax, int *pts_idx_of_voxels, float *pooled_features, int pool_method);
-
-void roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y, int out_z, int channels, int max_pts_each_voxel,
-    const int *pts_idx_of_voxels, const int *argmax, const float *grad_out, float *grad_in, int pool_method);
-
-int roiaware_pool3d_gpu(at::Tensor rois, at::Tensor pts, at::Tensor pts_feature, at::Tensor argmax,
-    at::Tensor pts_idx_of_voxels, at::Tensor pooled_features, int pool_method);
-
-int roiaware_pool3d_gpu_backward(at::Tensor pts_idx_of_voxels, at::Tensor argmax, at::Tensor grad_out,
+#define CHECK_CUDA(x) \
+  TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
+#define CHECK_CONTIGUOUS(x) \
+  TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
+#define CHECK_INPUT(x) \
+  CHECK_CUDA(x);       \
+  CHECK_CONTIGUOUS(x)
+
+void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels,
+                              int max_pts_each_voxel, int out_x, int out_y,
+                              int out_z, const float *rois, const float *pts,
+                              const float *pts_feature, int *argmax,
+                              int *pts_idx_of_voxels, float *pooled_features,
+                              int pool_method);
+
+void roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y,
+                                       int out_z, int channels,
+                                       int max_pts_each_voxel,
+                                       const int *pts_idx_of_voxels,
+                                       const int *argmax, const float *grad_out,
+                                       float *grad_in, int pool_method);
+
+int roiaware_pool3d_gpu(at::Tensor rois, at::Tensor pts, at::Tensor pts_feature,
+                        at::Tensor argmax, at::Tensor pts_idx_of_voxels,
+                        at::Tensor pooled_features, int pool_method);
+
+int roiaware_pool3d_gpu_backward(at::Tensor pts_idx_of_voxels,
+                                 at::Tensor argmax, at::Tensor grad_out,
                                 at::Tensor grad_in, int pool_method);

-int points_in_boxes_cpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tensor pts_indices_tensor);
-
-int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tensor box_idx_of_points_tensor);
+int points_in_boxes_cpu(at::Tensor boxes_tensor, at::Tensor pts_tensor,
+                        at::Tensor pts_indices_tensor);

+int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor,
+                        at::Tensor box_idx_of_points_tensor);

-int roiaware_pool3d_gpu(at::Tensor rois, at::Tensor pts, at::Tensor pts_feature, at::Tensor argmax, at::Tensor pts_idx_of_voxels, at::Tensor pooled_features, int pool_method){
+int roiaware_pool3d_gpu(at::Tensor rois, at::Tensor pts, at::Tensor pts_feature,
+                        at::Tensor argmax, at::Tensor pts_idx_of_voxels,
+                        at::Tensor pooled_features, int pool_method) {
  // params rois: (N, 7) [x, y, z, w, l, h, ry] in LiDAR coordinate
  // params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
  // params pts_feature: (npoints, C)
@@ -56,22 +69,27 @@ int roiaware_pool3d_gpu(at::Tensor rois, at::Tensor pts, at::Tensor pts_feature,
  int out_x = pts_idx_of_voxels.size(1);
  int out_y = pts_idx_of_voxels.size(2);
  int out_z = pts_idx_of_voxels.size(3);
-    assert ((out_x < 256) && (out_y < 256) && (out_z < 256));  // we encode index with 8bit
+  assert((out_x < 256) && (out_y < 256) &&
+         (out_z < 256));  // we encode index with 8bit

-    const float *rois_data = rois.data<float>();
-    const float *pts_data = pts.data<float>();
-    const float *pts_feature_data = pts_feature.data<float>();
-    int *argmax_data = argmax.data<int>();
-    int *pts_idx_of_voxels_data = pts_idx_of_voxels.data<int>();
-    float *pooled_features_data = pooled_features.data<float>();
+  const float *rois_data = rois.data_ptr<float>();
+  const float *pts_data = pts.data_ptr<float>();
+  const float *pts_feature_data = pts_feature.data_ptr<float>();
+  int *argmax_data = argmax.data_ptr<int>();
+  int *pts_idx_of_voxels_data = pts_idx_of_voxels.data_ptr<int>();
+  float *pooled_features_data = pooled_features.data_ptr<float>();

-    roiaware_pool3d_launcher(boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
-        rois_data, pts_data, pts_feature_data, argmax_data, pts_idx_of_voxels_data, pooled_features_data, pool_method);
+  roiaware_pool3d_launcher(
+      boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
+      rois_data, pts_data, pts_feature_data, argmax_data,
+      pts_idx_of_voxels_data, pooled_features_data, pool_method);

  return 1;
 }

-int roiaware_pool3d_gpu_backward(at::Tensor pts_idx_of_voxels, at::Tensor argmax, at::Tensor grad_out, at::Tensor grad_in, int pool_method){
+int roiaware_pool3d_gpu_backward(at::Tensor pts_idx_of_voxels,
+                                 at::Tensor argmax, at::Tensor grad_out,
+                                 at::Tensor grad_in, int pool_method) {
  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
  // params argmax: (N, out_x, out_y, out_z, C)
  // params grad_out: (N, out_x, out_y, out_z, C)
@@ -90,20 +108,25 @@ int roiaware_pool3d_gpu_backward(at::Tensor pts_idx_of_voxels, at::Tensor argmax
  int max_pts_each_voxel = pts_idx_of_voxels.size(4);  // index 0 is the counter
  int channels = grad_out.size(4);

-    const int *pts_idx_of_voxels_data = pts_idx_of_voxels.data<int>();
-    const int *argmax_data = argmax.data<int>();
-    const float *grad_out_data = grad_out.data<float>();
-    float *grad_in_data = grad_in.data<float>();
+  const int *pts_idx_of_voxels_data = pts_idx_of_voxels.data_ptr<int>();
+  const int *argmax_data = argmax.data_ptr<int>();
+  const float *grad_out_data = grad_out.data_ptr<float>();
+  float *grad_in_data = grad_in.data_ptr<float>();

-    roiaware_pool3d_backward_launcher(boxes_num, out_x, out_y, out_z, channels, max_pts_each_voxel,
-        pts_idx_of_voxels_data, argmax_data, grad_out_data, grad_in_data, pool_method);
+  roiaware_pool3d_backward_launcher(boxes_num, out_x, out_y, out_z, channels,
+                                    max_pts_each_voxel, pts_idx_of_voxels_data,
+                                    argmax_data, grad_out_data, grad_in_data,
+                                    pool_method);

  return 1;
 }

 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
  m.def("forward", &roiaware_pool3d_gpu, "roiaware pool3d forward (CUDA)");
-    m.def("backward", &roiaware_pool3d_gpu_backward, "roiaware pool3d backward (CUDA)");
-    m.def("points_in_boxes_gpu", &points_in_boxes_gpu, "points_in_boxes_gpu forward (CUDA)");
-    m.def("points_in_boxes_cpu", &points_in_boxes_cpu, "points_in_boxes_cpu forward (CPU)");
+  m.def("backward", &roiaware_pool3d_gpu_backward,
+        "roiaware pool3d backward (CUDA)");
+  m.def("points_in_boxes_gpu", &points_in_boxes_gpu,
+        "points_in_boxes_gpu forward (CUDA)");
+  m.def("points_in_boxes_cpu", &points_in_boxes_cpu,
+        "points_in_boxes_cpu forward (CPU)");
 }
--- a/mmdet3d/ops/sparse_block.py
+++ b/mmdet3d/ops/sparse_block.py
+from mmcv.cnn import build_norm_layer
+from torch import nn
+
+import mmdet3d.ops.spconv as spconv
+from mmdet.models.backbones.resnet import BasicBlock, Bottleneck
+
+
+def conv3x3(in_planes, out_planes, stride=1, indice_key=None):
+    """3x3 submanifold sparse convolution with padding.
+
+    Args:
+        in_planes (int): the number of input channels
+        out_planes (int): the number of output channels
+        stride (int): the stride of convolution
+        indice_key (str): the indice key used for sparse tensor
+
+    Returns:
+        spconv.conv.SubMConv3d: 3x3 submanifold sparse convolution ops
+    """
+    # TODO: deprecate this class
+    return spconv.SubMConv3d(
+        in_planes,
+        out_planes,
+        kernel_size=3,
+        stride=stride,
+        padding=1,
+        bias=False,
+        indice_key=indice_key)
+
+
+def conv1x1(in_planes, out_planes, stride=1, indice_key=None):
+    """1x1 submanifold sparse convolution with padding.
+
+    Args:
+        in_planes (int): the number of input channels
+        out_planes (int): the number of output channels
+        stride (int): the stride of convolution
+        indice_key (str): the indice key used for sparse tensor
+
+    Returns:
+        spconv.conv.SubMConv3d: 1x1 submanifold sparse convolution ops
+    """
+    # TODO: deprecate this class
+    return spconv.SubMConv3d(
+        in_planes,
+        out_planes,
+        kernel_size=1,
+        stride=stride,
+        padding=1,
+        bias=False,
+        indice_key=indice_key)
+
+
+class SparseBasicBlockV0(spconv.SparseModule):
+    expansion = 1
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 downsample=None,
+                 indice_key=None,
+                 norm_cfg=None):
+        """Sparse basic block for PartA^2.
+
+        Sparse basic block implemented with submanifold sparse convolution.
+        """
+        # TODO: deprecate this class
+        super().__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride, indice_key=indice_key)
+        norm_name1, norm_layer1 = build_norm_layer(norm_cfg, planes)
+        self.bn1 = norm_layer1
+        self.relu = nn.ReLU()
+        self.conv2 = conv3x3(planes, planes, indice_key=indice_key)
+        norm_name2, norm_layer2 = build_norm_layer(norm_cfg, planes)
+        self.bn2 = norm_layer2
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        identity = x.features
+
+        assert x.features.dim() == 2, f'x.features.dim()={x.features.dim()}'
+
+        out = self.conv1(x)
+        out.features = self.bn1(out.features)
+        out.features = self.relu(out.features)
+
+        out = self.conv2(out)
+        out.features = self.bn2(out.features)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out.features += identity
+        out.features = self.relu(out.features)
+
+        return out
+
+
+class SparseBottleneckV0(spconv.SparseModule):
+    expansion = 4
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 downsample=None,
+                 indice_key=None,
+                 norm_fn=None):
+        """Sparse bottleneck block for PartA^2.
+
+        Bottleneck block implemented with submanifold sparse convolution.
+        """
+        # TODO: deprecate this class
+        super().__init__()
+        self.conv1 = conv1x1(inplanes, planes, indice_key=indice_key)
+        self.bn1 = norm_fn(planes)
+        self.conv2 = conv3x3(planes, planes, stride, indice_key=indice_key)
+        self.bn2 = norm_fn(planes)
+        self.conv3 = conv1x1(
+            planes, planes * self.expansion, indice_key=indice_key)
+        self.bn3 = norm_fn(planes * self.expansion)
+        self.relu = nn.ReLU()
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        identity = x.features
+
+        out = self.conv1(x)
+        out.features = self.bn1(out.features)
+        out.features = self.relu(out.features)
+
+        out = self.conv2(out)
+        out.features = self.bn2(out.features)
+        out.features = self.relu(out.features)
+
+        out = self.conv3(out)
+        out.features = self.bn3(out.features)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out.features += identity
+        out.features = self.relu(out.features)
+
+        return out
+
+
+class SparseBottleneck(Bottleneck, spconv.SparseModule):
+    expansion = 4
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 downsample=None,
+                 conv_cfg=None,
+                 norm_cfg=None):
+        """Sparse bottleneck block for PartA^2.
+
+        Bottleneck block implemented with submanifold sparse convolution.
+        """
+        spconv.SparseModule.__init__(self)
+        Bottleneck.__init__(
+            self,
+            inplanes,
+            planes,
+            stride=stride,
+            downsample=downsample,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg)
+
+    def forward(self, x):
+        identity = x.features
+
+        out = self.conv1(x)
+        out.features = self.bn1(out.features)
+        out.features = self.relu(out.features)
+
+        out = self.conv2(out)
+        out.features = self.bn2(out.features)
+        out.features = self.relu(out.features)
+
+        out = self.conv3(out)
+        out.features = self.bn3(out.features)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out.features += identity
+        out.features = self.relu(out.features)
+
+        return out
+
+
+class SparseBasicBlock(BasicBlock, spconv.SparseModule):
+    expansion = 1
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 downsample=None,
+                 conv_cfg=None,
+                 norm_cfg=None):
+        """Sparse basic block for PartA^2.
+
+        Sparse basic block implemented with submanifold sparse convolution.
+        """
+        spconv.SparseModule.__init__(self)
+        BasicBlock.__init__(
+            self,
+            inplanes,
+            planes,
+            stride=stride,
+            downsample=downsample,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg)
+
+    def forward(self, x):
+        identity = x.features
+
+        assert x.features.dim() == 2, f'x.features.dim()={x.features.dim()}'
+
+        out = self.conv1(x)
+        out.features = self.norm1(out.features)
+        out.features = self.relu(out.features)
+
+        out = self.conv2(out)
+        out.features = self.norm2(out.features)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out.features += identity
+        out.features = self.relu(out.features)
+
+        return out
--- a/mmdet3d/ops/spconv/conv.py
+++ b/mmdet3d/ops/spconv/conv.py
@@ -16,6 +16,7 @@ import math

 import numpy as np
 import torch
+from mmcv.cnn import CONV_LAYERS
 from torch.nn import init
 from torch.nn.parameter import Parameter

@@ -205,6 +206,7 @@ class SparseConvolution(SparseModule):
        return out_tensor


+@CONV_LAYERS.register_module()
 class SparseConv2d(SparseConvolution):

    def __init__(self,
@@ -230,6 +232,7 @@ class SparseConv2d(SparseConvolution):
            indice_key=indice_key)


+@CONV_LAYERS.register_module()
 class SparseConv3d(SparseConvolution):

    def __init__(self,
@@ -255,6 +258,7 @@ class SparseConv3d(SparseConvolution):
            indice_key=indice_key)


+@CONV_LAYERS.register_module()
 class SparseConv4d(SparseConvolution):

    def __init__(self,
@@ -280,6 +284,7 @@ class SparseConv4d(SparseConvolution):
            indice_key=indice_key)


+@CONV_LAYERS.register_module()
 class SparseConvTranspose2d(SparseConvolution):

    def __init__(self,
@@ -306,6 +311,7 @@ class SparseConvTranspose2d(SparseConvolution):
            indice_key=indice_key)


+@CONV_LAYERS.register_module()
 class SparseConvTranspose3d(SparseConvolution):

    def __init__(self,
@@ -332,6 +338,7 @@ class SparseConvTranspose3d(SparseConvolution):
            indice_key=indice_key)


+@CONV_LAYERS.register_module()
 class SparseInverseConv2d(SparseConvolution):

    def __init__(self,
@@ -350,6 +357,7 @@ class SparseInverseConv2d(SparseConvolution):
            indice_key=indice_key)


+@CONV_LAYERS.register_module()
 class SparseInverseConv3d(SparseConvolution):

    def __init__(self,
@@ -368,6 +376,7 @@ class SparseInverseConv3d(SparseConvolution):
            indice_key=indice_key)


+@CONV_LAYERS.register_module()
 class SubMConv2d(SparseConvolution):

    def __init__(self,
@@ -394,6 +403,7 @@ class SubMConv2d(SparseConvolution):
            indice_key=indice_key)


+@CONV_LAYERS.register_module()
 class SubMConv3d(SparseConvolution):

    def __init__(self,
@@ -420,6 +430,7 @@ class SubMConv3d(SparseConvolution):
            indice_key=indice_key)


+@CONV_LAYERS.register_module()
 class SubMConv4d(SparseConvolution):

    def __init__(self,

--- a/mmdet3d/ops/spconv/include/spconv/fused_spconv_ops.h
+++ b/mmdet3d/ops/spconv/include/spconv/fused_spconv_ops.h
--- a/mmdet3d/ops/spconv/include/spconv/pool_ops.h
+++ b/mmdet3d/ops/spconv/include/spconv/pool_ops.h
@@ -34,7 +34,7 @@ torch::Tensor indiceMaxPool(torch::Tensor features, torch::Tensor indicePairs,
  torch::Tensor output = torch::zeros({numAct, numInPlanes}, options);
  double totalTime = 0;
  for (int i = 0; i < kernelVolume; ++i) {
-    auto nHot = indicePairNumCpu.data<int>()[i];
+    auto nHot = indicePairNumCpu.data_ptr<int>()[i];
    if (nHot <= 0) {
      continue;
    }
@@ -60,7 +60,8 @@ torch::Tensor indiceMaxPool(torch::Tensor features, torch::Tensor indicePairs,
 template <typename T>
 torch::Tensor indiceMaxPoolBackward(torch::Tensor features,
                                    torch::Tensor outFeatures,
-                                  torch::Tensor outGrad, torch::Tensor indicePairs,
+                                    torch::Tensor outGrad,
+                                    torch::Tensor indicePairs,
                                    torch::Tensor indiceNum) {
  auto device = features.device().type();
  auto numInPlanes = features.size(1);
@@ -70,7 +71,7 @@ torch::Tensor indiceMaxPoolBackward(torch::Tensor features,
  torch::Tensor inputGrad = torch::zeros(features.sizes(), options);
  auto kernelVolume = indicePairs.size(0);
  for (int i = 0; i < kernelVolume; ++i) {
-    auto nHot = indicePairNumCpu.data<int>()[i];
+    auto nHot = indicePairNumCpu.data_ptr<int>()[i];
    if (nHot <= 0) {
      continue;
    }

--- a/mmdet3d/ops/spconv/include/spconv/spconv_ops.h
+++ b/mmdet3d/ops/spconv/include/spconv/spconv_ops.h
--- a/mmdet3d/ops/spconv/include/torch_utils.h
+++ b/mmdet3d/ops/spconv/include/torch_utils.h
@@ -13,20 +13,21 @@
 // limitations under the License.

 #pragma once
-#include <tensorview/tensorview.h>
-#include <torch/script.h>
 #include <ATen/ATen.h>
 #include <ATen/cuda/CUDAContext.h>
+#include <tensorview/tensorview.h>
+#include <torch/script.h>

 namespace tv {

-struct TorchGPU: public tv::GPU {
+struct TorchGPU : public tv::GPU {
  virtual cudaStream_t getStream() const override {
    return at::cuda::getCurrentCUDAStream();
  }
 };

-template <typename T> void check_torch_dtype(const torch::Tensor &tensor) {
+template <typename T>
+void check_torch_dtype(const torch::Tensor &tensor) {
  switch (tensor.type().scalarType()) {
    case at::ScalarType::Double: {
      auto val = std::is_same<std::remove_const_t<T>, double>::value;
@@ -65,6 +66,6 @@ tv::TensorView<T> torch2tv(const torch::Tensor &tensor) {
  for (auto i : tensor.sizes()) {
    shape.push_back(i);
  }
-  return tv::TensorView<T>(tensor.data<std::remove_const_t<T>>(), shape);
+  return tv::TensorView<T>(tensor.data_ptr<std::remove_const_t<T>>(), shape);
 }
 }  // namespace tv
--- a/mmdet3d/ops/voxel/src/scatter_points_cpu.cpp
+++ b/mmdet3d/ops/voxel/src/scatter_points_cpu.cpp
--- a/mmdet3d/ops/voxel/src/scatter_points_cuda.cu
+++ b/mmdet3d/ops/voxel/src/scatter_points_cuda.cu
@@ -6,7 +6,7 @@
 #include <ATen/cuda/CUDAApplyUtils.cuh>

 #define CHECK_CUDA(x) \
-  TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
+  TORCH_CHECK(x.device().is_cuda(), #x " must be a CUDA tensor")
 #define CHECK_CONTIGUOUS(x) \
  TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
 #define CHECK_INPUT(x) \
@@ -177,7 +177,7 @@ std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu(
  dim3 threads(threadsPerBlock);
  cudaStream_t map_stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_ALL_TYPES(
-      voxel_mapping.type(), "determin_duplicate", ([&] {
+      voxel_mapping.scalar_type(), "determin_duplicate", ([&] {
        point_to_voxelidx_kernel<int><<<blocks, threads, 0, map_stream>>>(
            voxel_mapping.data_ptr<int>(), point_to_voxelidx.data_ptr<int>(),
            point_to_pointidx.data_ptr<int>(), num_points, NDim);
@@ -203,7 +203,7 @@ std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu(
      voxel_mapping.options());  // must be zero from the begining
  cudaStream_t logic_stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_ALL_TYPES(
-      voxel_mapping.type(), "determin_duplicate", ([&] {
+      voxel_mapping.scalar_type(), "determin_duplicate", ([&] {
        determin_voxel_num<int><<<1, 1, 0, logic_stream>>>(
            voxel_mapping.data_ptr<int>(), num_points_per_voxel.data_ptr<int>(),
            point_to_voxelidx.data_ptr<int>(),
@@ -228,7 +228,7 @@ std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu(
  dim3 cp_threads(threadsPerBlock, 4);
  cudaStream_t cp_stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_ALL_TYPES(
-      points.type(), "scatter_point_to_voxel", ([&] {
+      points.scalar_type(), "scatter_point_to_voxel", ([&] {
        scatter_point_to_voxel_kernel<float, int>
            <<<blocks, cp_threads, 0, cp_stream>>>(
                points.data_ptr<float>(), voxel_mapping.data_ptr<int>(),
@@ -265,8 +265,8 @@ void dynamic_point_to_voxel_backward_gpu(at::Tensor& grad_input_points,
  dim3 blocks(col_blocks);
  dim3 cp_threads(threadsPerBlock, 4);
  cudaStream_t cp_stream = at::cuda::getCurrentCUDAStream();
-  AT_DISPATCH_ALL_TYPES(grad_input_points.type(), "scatter_point_to_voxel",
-                        ([&] {
+  AT_DISPATCH_ALL_TYPES(grad_input_points.scalar_type(),
+                        "scatter_point_to_voxel", ([&] {
                          map_voxel_to_point_kernel<float, int>
                              <<<blocks, cp_threads, 0, cp_stream>>>(
                                  grad_input_points.data_ptr<float>(),

--- a/mmdet3d/ops/voxel/src/voxelization.h
+++ b/mmdet3d/ops/voxel/src/voxelization.h
--- a/mmdet3d/ops/voxel/src/voxelization_cpu.cpp
+++ b/mmdet3d/ops/voxel/src/voxelization_cpu.cpp
--- a/mmdet3d/ops/voxel/src/voxelization_cuda.cu
+++ b/mmdet3d/ops/voxel/src/voxelization_cuda.cu
--- a/mmdet3d/utils/__init__.py
+++ b/mmdet3d/utils/__init__.py
--- a/mmdet3d/utils/logger.py
+++ b/mmdet3d/utils/logger.py
--- a/requirements/runtime.txt
+++ b/requirements/runtime.txt
--- a/tests/test_anchor.py
+++ b/tests/test_anchor.py
--- a/tests/test_config.py
+++ b/tests/test_config.py
--- a/tests/test_heads.py
+++ b/tests/test_heads.py
--- a/tests/test_indoor_sample.py
+++ b/tests/test_indoor_sample.py