"sgl-kernel/vscode:/vscode.git/clone" did not exist on "0e78c63c0ec94b68ad28ead2bc39c93137b2dbc3"
Commit 4f1a5e52 authored by liyinhao's avatar liyinhao
Browse files

Merge branch 'master_temp' into indoor_augment

parents c2c0f3d8 f584b970
//Modified from // Modified from
//https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu // https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//Points in boxes gpu // Points in boxes gpu
//Written by Shaoshuai Shi // Written by Shaoshuai Shi
//All Rights Reserved 2019. // All Rights Reserved 2019.
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include <assert.h> #include <assert.h>
#include <math.h> #include <math.h>
#include <stdio.h> #include <stdio.h>
#include <torch/extension.h>
#include <torch/serialize/tensor.h>
#define THREADS_PER_BLOCK 256 #define THREADS_PER_BLOCK 256
#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") #define CHECK_CUDA(x) \
#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) #define CHECK_CONTIGUOUS(x) \
TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
CHECK_CONTIGUOUS(x)
// #define DEBUG // #define DEBUG
__device__ inline void lidar_to_local_coords(float shift_x, float shift_y,
__device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float rz, float &local_x, float &local_y){ float rz, float &local_x,
float &local_y) {
// should rotate pi/2 + alpha to translate LiDAR to local // should rotate pi/2 + alpha to translate LiDAR to local
float rot_angle = rz + M_PI / 2; float rot_angle = rz + M_PI / 2;
float cosa = cos(rot_angle), sina = sin(rot_angle); float cosa = cos(rot_angle), sina = sin(rot_angle);
...@@ -29,10 +32,11 @@ __device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float ...@@ -29,10 +32,11 @@ __device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float
local_y = shift_x * sina + shift_y * cosa; local_y = shift_x * sina + shift_y * cosa;
} }
__device__ inline int check_pt_in_box3d(const float *pt, const float *box3d,
__device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, float &local_x, float &local_y){ float &local_x, float &local_y) {
// param pt: (x, y, z) // param pt: (x, y, z)
// param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the bottom center // param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the
// bottom center
float x = pt[0], y = pt[1], z = pt[2]; float x = pt[0], y = pt[1], z = pt[2];
float cx = box3d[0], cy = box3d[1], cz = box3d[2]; float cx = box3d[0], cy = box3d[1], cz = box3d[2];
float w = box3d[3], l = box3d[4], h = box3d[5], rz = box3d[6]; float w = box3d[3], l = box3d[4], h = box3d[5], rz = box3d[6];
...@@ -40,16 +44,19 @@ __device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, flo ...@@ -40,16 +44,19 @@ __device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, flo
if (fabsf(z - cz) > h / 2.0) return 0; if (fabsf(z - cz) > h / 2.0) return 0;
lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y); lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y);
float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) & (local_y > -w / 2.0) & (local_y < w / 2.0); float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) &
(local_y > -w / 2.0) & (local_y < w / 2.0);
return in_flag; return in_flag;
} }
__global__ void points_in_boxes_kernel(int batch_size, int boxes_num,
__global__ void points_in_boxes_kernel(int batch_size, int boxes_num, int pts_num, const float *boxes, int pts_num, const float *boxes,
const float *pts, int *box_idx_of_points){ const float *pts,
// params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the bottom center, each box DO NOT overlaps int *box_idx_of_points) {
// params pts: (B, npoints, 3) [x, y, z] in LiDAR coordinate // params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is
// params boxes_idx_of_points: (B, npoints), default -1 // the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x,
// y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default
// -1
int bs_idx = blockIdx.y; int bs_idx = blockIdx.y;
int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
...@@ -61,26 +68,28 @@ __global__ void points_in_boxes_kernel(int batch_size, int boxes_num, int pts_nu ...@@ -61,26 +68,28 @@ __global__ void points_in_boxes_kernel(int batch_size, int boxes_num, int pts_nu
float local_x = 0, local_y = 0; float local_x = 0, local_y = 0;
int cur_in_flag = 0; int cur_in_flag = 0;
for (int k = 0; k < boxes_num; k++){ for (int k = 0; k < boxes_num; k++) {
cur_in_flag = check_pt_in_box3d(pts, boxes + k * 7, local_x, local_y); cur_in_flag = check_pt_in_box3d(pts, boxes + k * 7, local_x, local_y);
if (cur_in_flag){ if (cur_in_flag) {
box_idx_of_points[0] = k; box_idx_of_points[0] = k;
break; break;
} }
} }
} }
void points_in_boxes_launcher(int batch_size, int boxes_num, int pts_num,
void points_in_boxes_launcher(int batch_size, int boxes_num, int pts_num, const float *boxes, const float *boxes, const float *pts,
const float *pts, int *box_idx_of_points){ int *box_idx_of_points) {
// params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the bottom center, each box DO NOT overlaps // params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is
// params pts: (B, npoints, 3) [x, y, z] in LiDAR coordinate // the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x,
// params boxes_idx_of_points: (B, npoints), default -1 // y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default
// -1
cudaError_t err; cudaError_t err;
dim3 blocks(DIVUP(pts_num, THREADS_PER_BLOCK), batch_size); dim3 blocks(DIVUP(pts_num, THREADS_PER_BLOCK), batch_size);
dim3 threads(THREADS_PER_BLOCK); dim3 threads(THREADS_PER_BLOCK);
points_in_boxes_kernel<<<blocks, threads>>>(batch_size, boxes_num, pts_num, boxes, pts, box_idx_of_points); points_in_boxes_kernel<<<blocks, threads>>>(batch_size, boxes_num, pts_num,
boxes, pts, box_idx_of_points);
err = cudaGetLastError(); err = cudaGetLastError();
if (cudaSuccess != err) { if (cudaSuccess != err) {
...@@ -93,10 +102,12 @@ void points_in_boxes_launcher(int batch_size, int boxes_num, int pts_num, const ...@@ -93,10 +102,12 @@ void points_in_boxes_launcher(int batch_size, int boxes_num, int pts_num, const
#endif #endif
} }
int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tensor box_idx_of_points_tensor){ int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor,
// params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the bottom center, each box DO NOT overlaps at::Tensor box_idx_of_points_tensor) {
// params pts: (B, npoints, 3) [x, y, z] in LiDAR coordinate // params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is
// params boxes_idx_of_points: (B, npoints), default -1 // the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x,
// y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default
// -1
CHECK_INPUT(boxes_tensor); CHECK_INPUT(boxes_tensor);
CHECK_INPUT(pts_tensor); CHECK_INPUT(pts_tensor);
...@@ -106,11 +117,12 @@ int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tens ...@@ -106,11 +117,12 @@ int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tens
int boxes_num = boxes_tensor.size(1); int boxes_num = boxes_tensor.size(1);
int pts_num = pts_tensor.size(1); int pts_num = pts_tensor.size(1);
const float *boxes = boxes_tensor.data<float>(); const float *boxes = boxes_tensor.data_ptr<float>();
const float *pts = pts_tensor.data<float>(); const float *pts = pts_tensor.data_ptr<float>();
int *box_idx_of_points = box_idx_of_points_tensor.data<int>(); int *box_idx_of_points = box_idx_of_points_tensor.data_ptr<int>();
points_in_boxes_launcher(batch_size, boxes_num, pts_num, boxes, pts, box_idx_of_points); points_in_boxes_launcher(batch_size, boxes_num, pts_num, boxes, pts,
box_idx_of_points);
return 1; return 1;
} }
//Modified from // Modified from
//https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu // https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//RoI-aware point cloud feature pooling // RoI-aware point cloud feature pooling
//Written by Shaoshuai Shi // Written by Shaoshuai Shi
//All Rights Reserved 2019. // All Rights Reserved 2019.
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include <assert.h> #include <assert.h>
#include <torch/extension.h>
#include <torch/serialize/tensor.h>
#define CHECK_CUDA(x) \
#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") #define CHECK_CONTIGUOUS(x) \
#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, CHECK_CONTIGUOUS(x)
int out_x, int out_y, int out_z, const float *rois, const float *pts, const float *pts_feature,
int *argmax, int *pts_idx_of_voxels, float *pooled_features, int pool_method); void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels,
int max_pts_each_voxel, int out_x, int out_y,
void roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y, int out_z, int channels, int max_pts_each_voxel, int out_z, const float *rois, const float *pts,
const int *pts_idx_of_voxels, const int *argmax, const float *grad_out, float *grad_in, int pool_method); const float *pts_feature, int *argmax,
int *pts_idx_of_voxels, float *pooled_features,
int roiaware_pool3d_gpu(at::Tensor rois, at::Tensor pts, at::Tensor pts_feature, at::Tensor argmax, int pool_method);
at::Tensor pts_idx_of_voxels, at::Tensor pooled_features, int pool_method);
void roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y,
int roiaware_pool3d_gpu_backward(at::Tensor pts_idx_of_voxels, at::Tensor argmax, at::Tensor grad_out, int out_z, int channels,
int max_pts_each_voxel,
const int *pts_idx_of_voxels,
const int *argmax, const float *grad_out,
float *grad_in, int pool_method);
int roiaware_pool3d_gpu(at::Tensor rois, at::Tensor pts, at::Tensor pts_feature,
at::Tensor argmax, at::Tensor pts_idx_of_voxels,
at::Tensor pooled_features, int pool_method);
int roiaware_pool3d_gpu_backward(at::Tensor pts_idx_of_voxels,
at::Tensor argmax, at::Tensor grad_out,
at::Tensor grad_in, int pool_method); at::Tensor grad_in, int pool_method);
int points_in_boxes_cpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tensor pts_indices_tensor); int points_in_boxes_cpu(at::Tensor boxes_tensor, at::Tensor pts_tensor,
at::Tensor pts_indices_tensor);
int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tensor box_idx_of_points_tensor);
int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor,
at::Tensor box_idx_of_points_tensor);
int roiaware_pool3d_gpu(at::Tensor rois, at::Tensor pts, at::Tensor pts_feature, at::Tensor argmax, at::Tensor pts_idx_of_voxels, at::Tensor pooled_features, int pool_method){ int roiaware_pool3d_gpu(at::Tensor rois, at::Tensor pts, at::Tensor pts_feature,
at::Tensor argmax, at::Tensor pts_idx_of_voxels,
at::Tensor pooled_features, int pool_method) {
// params rois: (N, 7) [x, y, z, w, l, h, ry] in LiDAR coordinate // params rois: (N, 7) [x, y, z, w, l, h, ry] in LiDAR coordinate
// params pts: (npoints, 3) [x, y, z] in LiDAR coordinate // params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
// params pts_feature: (npoints, C) // params pts_feature: (npoints, C)
...@@ -56,22 +69,27 @@ int roiaware_pool3d_gpu(at::Tensor rois, at::Tensor pts, at::Tensor pts_feature, ...@@ -56,22 +69,27 @@ int roiaware_pool3d_gpu(at::Tensor rois, at::Tensor pts, at::Tensor pts_feature,
int out_x = pts_idx_of_voxels.size(1); int out_x = pts_idx_of_voxels.size(1);
int out_y = pts_idx_of_voxels.size(2); int out_y = pts_idx_of_voxels.size(2);
int out_z = pts_idx_of_voxels.size(3); int out_z = pts_idx_of_voxels.size(3);
assert ((out_x < 256) && (out_y < 256) && (out_z < 256)); // we encode index with 8bit assert((out_x < 256) && (out_y < 256) &&
(out_z < 256)); // we encode index with 8bit
const float *rois_data = rois.data<float>(); const float *rois_data = rois.data_ptr<float>();
const float *pts_data = pts.data<float>(); const float *pts_data = pts.data_ptr<float>();
const float *pts_feature_data = pts_feature.data<float>(); const float *pts_feature_data = pts_feature.data_ptr<float>();
int *argmax_data = argmax.data<int>(); int *argmax_data = argmax.data_ptr<int>();
int *pts_idx_of_voxels_data = pts_idx_of_voxels.data<int>(); int *pts_idx_of_voxels_data = pts_idx_of_voxels.data_ptr<int>();
float *pooled_features_data = pooled_features.data<float>(); float *pooled_features_data = pooled_features.data_ptr<float>();
roiaware_pool3d_launcher(boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z, roiaware_pool3d_launcher(
rois_data, pts_data, pts_feature_data, argmax_data, pts_idx_of_voxels_data, pooled_features_data, pool_method); boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
rois_data, pts_data, pts_feature_data, argmax_data,
pts_idx_of_voxels_data, pooled_features_data, pool_method);
return 1; return 1;
} }
int roiaware_pool3d_gpu_backward(at::Tensor pts_idx_of_voxels, at::Tensor argmax, at::Tensor grad_out, at::Tensor grad_in, int pool_method){ int roiaware_pool3d_gpu_backward(at::Tensor pts_idx_of_voxels,
at::Tensor argmax, at::Tensor grad_out,
at::Tensor grad_in, int pool_method) {
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params argmax: (N, out_x, out_y, out_z, C) // params argmax: (N, out_x, out_y, out_z, C)
// params grad_out: (N, out_x, out_y, out_z, C) // params grad_out: (N, out_x, out_y, out_z, C)
...@@ -90,20 +108,25 @@ int roiaware_pool3d_gpu_backward(at::Tensor pts_idx_of_voxels, at::Tensor argmax ...@@ -90,20 +108,25 @@ int roiaware_pool3d_gpu_backward(at::Tensor pts_idx_of_voxels, at::Tensor argmax
int max_pts_each_voxel = pts_idx_of_voxels.size(4); // index 0 is the counter int max_pts_each_voxel = pts_idx_of_voxels.size(4); // index 0 is the counter
int channels = grad_out.size(4); int channels = grad_out.size(4);
const int *pts_idx_of_voxels_data = pts_idx_of_voxels.data<int>(); const int *pts_idx_of_voxels_data = pts_idx_of_voxels.data_ptr<int>();
const int *argmax_data = argmax.data<int>(); const int *argmax_data = argmax.data_ptr<int>();
const float *grad_out_data = grad_out.data<float>(); const float *grad_out_data = grad_out.data_ptr<float>();
float *grad_in_data = grad_in.data<float>(); float *grad_in_data = grad_in.data_ptr<float>();
roiaware_pool3d_backward_launcher(boxes_num, out_x, out_y, out_z, channels, max_pts_each_voxel, roiaware_pool3d_backward_launcher(boxes_num, out_x, out_y, out_z, channels,
pts_idx_of_voxels_data, argmax_data, grad_out_data, grad_in_data, pool_method); max_pts_each_voxel, pts_idx_of_voxels_data,
argmax_data, grad_out_data, grad_in_data,
pool_method);
return 1; return 1;
} }
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &roiaware_pool3d_gpu, "roiaware pool3d forward (CUDA)"); m.def("forward", &roiaware_pool3d_gpu, "roiaware pool3d forward (CUDA)");
m.def("backward", &roiaware_pool3d_gpu_backward, "roiaware pool3d backward (CUDA)"); m.def("backward", &roiaware_pool3d_gpu_backward,
m.def("points_in_boxes_gpu", &points_in_boxes_gpu, "points_in_boxes_gpu forward (CUDA)"); "roiaware pool3d backward (CUDA)");
m.def("points_in_boxes_cpu", &points_in_boxes_cpu, "points_in_boxes_cpu forward (CPU)"); m.def("points_in_boxes_gpu", &points_in_boxes_gpu,
"points_in_boxes_gpu forward (CUDA)");
m.def("points_in_boxes_cpu", &points_in_boxes_cpu,
"points_in_boxes_cpu forward (CPU)");
} }
from mmcv.cnn import build_norm_layer
from torch import nn
import mmdet3d.ops.spconv as spconv
from mmdet.models.backbones.resnet import BasicBlock, Bottleneck
def conv3x3(in_planes, out_planes, stride=1, indice_key=None):
"""3x3 submanifold sparse convolution with padding.
Args:
in_planes (int): the number of input channels
out_planes (int): the number of output channels
stride (int): the stride of convolution
indice_key (str): the indice key used for sparse tensor
Returns:
spconv.conv.SubMConv3d: 3x3 submanifold sparse convolution ops
"""
# TODO: deprecate this class
return spconv.SubMConv3d(
in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=1,
bias=False,
indice_key=indice_key)
def conv1x1(in_planes, out_planes, stride=1, indice_key=None):
"""1x1 submanifold sparse convolution with padding.
Args:
in_planes (int): the number of input channels
out_planes (int): the number of output channels
stride (int): the stride of convolution
indice_key (str): the indice key used for sparse tensor
Returns:
spconv.conv.SubMConv3d: 1x1 submanifold sparse convolution ops
"""
# TODO: deprecate this class
return spconv.SubMConv3d(
in_planes,
out_planes,
kernel_size=1,
stride=stride,
padding=1,
bias=False,
indice_key=indice_key)
class SparseBasicBlockV0(spconv.SparseModule):
expansion = 1
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
indice_key=None,
norm_cfg=None):
"""Sparse basic block for PartA^2.
Sparse basic block implemented with submanifold sparse convolution.
"""
# TODO: deprecate this class
super().__init__()
self.conv1 = conv3x3(inplanes, planes, stride, indice_key=indice_key)
norm_name1, norm_layer1 = build_norm_layer(norm_cfg, planes)
self.bn1 = norm_layer1
self.relu = nn.ReLU()
self.conv2 = conv3x3(planes, planes, indice_key=indice_key)
norm_name2, norm_layer2 = build_norm_layer(norm_cfg, planes)
self.bn2 = norm_layer2
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x.features
assert x.features.dim() == 2, f'x.features.dim()={x.features.dim()}'
out = self.conv1(x)
out.features = self.bn1(out.features)
out.features = self.relu(out.features)
out = self.conv2(out)
out.features = self.bn2(out.features)
if self.downsample is not None:
identity = self.downsample(x)
out.features += identity
out.features = self.relu(out.features)
return out
class SparseBottleneckV0(spconv.SparseModule):
expansion = 4
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
indice_key=None,
norm_fn=None):
"""Sparse bottleneck block for PartA^2.
Bottleneck block implemented with submanifold sparse convolution.
"""
# TODO: deprecate this class
super().__init__()
self.conv1 = conv1x1(inplanes, planes, indice_key=indice_key)
self.bn1 = norm_fn(planes)
self.conv2 = conv3x3(planes, planes, stride, indice_key=indice_key)
self.bn2 = norm_fn(planes)
self.conv3 = conv1x1(
planes, planes * self.expansion, indice_key=indice_key)
self.bn3 = norm_fn(planes * self.expansion)
self.relu = nn.ReLU()
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x.features
out = self.conv1(x)
out.features = self.bn1(out.features)
out.features = self.relu(out.features)
out = self.conv2(out)
out.features = self.bn2(out.features)
out.features = self.relu(out.features)
out = self.conv3(out)
out.features = self.bn3(out.features)
if self.downsample is not None:
identity = self.downsample(x)
out.features += identity
out.features = self.relu(out.features)
return out
class SparseBottleneck(Bottleneck, spconv.SparseModule):
expansion = 4
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
conv_cfg=None,
norm_cfg=None):
"""Sparse bottleneck block for PartA^2.
Bottleneck block implemented with submanifold sparse convolution.
"""
spconv.SparseModule.__init__(self)
Bottleneck.__init__(
self,
inplanes,
planes,
stride=stride,
downsample=downsample,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg)
def forward(self, x):
identity = x.features
out = self.conv1(x)
out.features = self.bn1(out.features)
out.features = self.relu(out.features)
out = self.conv2(out)
out.features = self.bn2(out.features)
out.features = self.relu(out.features)
out = self.conv3(out)
out.features = self.bn3(out.features)
if self.downsample is not None:
identity = self.downsample(x)
out.features += identity
out.features = self.relu(out.features)
return out
class SparseBasicBlock(BasicBlock, spconv.SparseModule):
expansion = 1
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
conv_cfg=None,
norm_cfg=None):
"""Sparse basic block for PartA^2.
Sparse basic block implemented with submanifold sparse convolution.
"""
spconv.SparseModule.__init__(self)
BasicBlock.__init__(
self,
inplanes,
planes,
stride=stride,
downsample=downsample,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg)
def forward(self, x):
identity = x.features
assert x.features.dim() == 2, f'x.features.dim()={x.features.dim()}'
out = self.conv1(x)
out.features = self.norm1(out.features)
out.features = self.relu(out.features)
out = self.conv2(out)
out.features = self.norm2(out.features)
if self.downsample is not None:
identity = self.downsample(x)
out.features += identity
out.features = self.relu(out.features)
return out
...@@ -16,6 +16,7 @@ import math ...@@ -16,6 +16,7 @@ import math
import numpy as np import numpy as np
import torch import torch
from mmcv.cnn import CONV_LAYERS
from torch.nn import init from torch.nn import init
from torch.nn.parameter import Parameter from torch.nn.parameter import Parameter
...@@ -205,6 +206,7 @@ class SparseConvolution(SparseModule): ...@@ -205,6 +206,7 @@ class SparseConvolution(SparseModule):
return out_tensor return out_tensor
@CONV_LAYERS.register_module()
class SparseConv2d(SparseConvolution): class SparseConv2d(SparseConvolution):
def __init__(self, def __init__(self,
...@@ -230,6 +232,7 @@ class SparseConv2d(SparseConvolution): ...@@ -230,6 +232,7 @@ class SparseConv2d(SparseConvolution):
indice_key=indice_key) indice_key=indice_key)
@CONV_LAYERS.register_module()
class SparseConv3d(SparseConvolution): class SparseConv3d(SparseConvolution):
def __init__(self, def __init__(self,
...@@ -255,6 +258,7 @@ class SparseConv3d(SparseConvolution): ...@@ -255,6 +258,7 @@ class SparseConv3d(SparseConvolution):
indice_key=indice_key) indice_key=indice_key)
@CONV_LAYERS.register_module()
class SparseConv4d(SparseConvolution): class SparseConv4d(SparseConvolution):
def __init__(self, def __init__(self,
...@@ -280,6 +284,7 @@ class SparseConv4d(SparseConvolution): ...@@ -280,6 +284,7 @@ class SparseConv4d(SparseConvolution):
indice_key=indice_key) indice_key=indice_key)
@CONV_LAYERS.register_module()
class SparseConvTranspose2d(SparseConvolution): class SparseConvTranspose2d(SparseConvolution):
def __init__(self, def __init__(self,
...@@ -306,6 +311,7 @@ class SparseConvTranspose2d(SparseConvolution): ...@@ -306,6 +311,7 @@ class SparseConvTranspose2d(SparseConvolution):
indice_key=indice_key) indice_key=indice_key)
@CONV_LAYERS.register_module()
class SparseConvTranspose3d(SparseConvolution): class SparseConvTranspose3d(SparseConvolution):
def __init__(self, def __init__(self,
...@@ -332,6 +338,7 @@ class SparseConvTranspose3d(SparseConvolution): ...@@ -332,6 +338,7 @@ class SparseConvTranspose3d(SparseConvolution):
indice_key=indice_key) indice_key=indice_key)
@CONV_LAYERS.register_module()
class SparseInverseConv2d(SparseConvolution): class SparseInverseConv2d(SparseConvolution):
def __init__(self, def __init__(self,
...@@ -350,6 +357,7 @@ class SparseInverseConv2d(SparseConvolution): ...@@ -350,6 +357,7 @@ class SparseInverseConv2d(SparseConvolution):
indice_key=indice_key) indice_key=indice_key)
@CONV_LAYERS.register_module()
class SparseInverseConv3d(SparseConvolution): class SparseInverseConv3d(SparseConvolution):
def __init__(self, def __init__(self,
...@@ -368,6 +376,7 @@ class SparseInverseConv3d(SparseConvolution): ...@@ -368,6 +376,7 @@ class SparseInverseConv3d(SparseConvolution):
indice_key=indice_key) indice_key=indice_key)
@CONV_LAYERS.register_module()
class SubMConv2d(SparseConvolution): class SubMConv2d(SparseConvolution):
def __init__(self, def __init__(self,
...@@ -394,6 +403,7 @@ class SubMConv2d(SparseConvolution): ...@@ -394,6 +403,7 @@ class SubMConv2d(SparseConvolution):
indice_key=indice_key) indice_key=indice_key)
@CONV_LAYERS.register_module()
class SubMConv3d(SparseConvolution): class SubMConv3d(SparseConvolution):
def __init__(self, def __init__(self,
...@@ -420,6 +430,7 @@ class SubMConv3d(SparseConvolution): ...@@ -420,6 +430,7 @@ class SubMConv3d(SparseConvolution):
indice_key=indice_key) indice_key=indice_key)
@CONV_LAYERS.register_module()
class SubMConv4d(SparseConvolution): class SubMConv4d(SparseConvolution):
def __init__(self, def __init__(self,
......
...@@ -34,7 +34,7 @@ torch::Tensor indiceMaxPool(torch::Tensor features, torch::Tensor indicePairs, ...@@ -34,7 +34,7 @@ torch::Tensor indiceMaxPool(torch::Tensor features, torch::Tensor indicePairs,
torch::Tensor output = torch::zeros({numAct, numInPlanes}, options); torch::Tensor output = torch::zeros({numAct, numInPlanes}, options);
double totalTime = 0; double totalTime = 0;
for (int i = 0; i < kernelVolume; ++i) { for (int i = 0; i < kernelVolume; ++i) {
auto nHot = indicePairNumCpu.data<int>()[i]; auto nHot = indicePairNumCpu.data_ptr<int>()[i];
if (nHot <= 0) { if (nHot <= 0) {
continue; continue;
} }
...@@ -60,7 +60,8 @@ torch::Tensor indiceMaxPool(torch::Tensor features, torch::Tensor indicePairs, ...@@ -60,7 +60,8 @@ torch::Tensor indiceMaxPool(torch::Tensor features, torch::Tensor indicePairs,
template <typename T> template <typename T>
torch::Tensor indiceMaxPoolBackward(torch::Tensor features, torch::Tensor indiceMaxPoolBackward(torch::Tensor features,
torch::Tensor outFeatures, torch::Tensor outFeatures,
torch::Tensor outGrad, torch::Tensor indicePairs, torch::Tensor outGrad,
torch::Tensor indicePairs,
torch::Tensor indiceNum) { torch::Tensor indiceNum) {
auto device = features.device().type(); auto device = features.device().type();
auto numInPlanes = features.size(1); auto numInPlanes = features.size(1);
...@@ -70,7 +71,7 @@ torch::Tensor indiceMaxPoolBackward(torch::Tensor features, ...@@ -70,7 +71,7 @@ torch::Tensor indiceMaxPoolBackward(torch::Tensor features,
torch::Tensor inputGrad = torch::zeros(features.sizes(), options); torch::Tensor inputGrad = torch::zeros(features.sizes(), options);
auto kernelVolume = indicePairs.size(0); auto kernelVolume = indicePairs.size(0);
for (int i = 0; i < kernelVolume; ++i) { for (int i = 0; i < kernelVolume; ++i) {
auto nHot = indicePairNumCpu.data<int>()[i]; auto nHot = indicePairNumCpu.data_ptr<int>()[i];
if (nHot <= 0) { if (nHot <= 0) {
continue; continue;
} }
......
...@@ -13,20 +13,21 @@ ...@@ -13,20 +13,21 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <tensorview/tensorview.h>
#include <torch/script.h>
#include <ATen/ATen.h> #include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h> #include <ATen/cuda/CUDAContext.h>
#include <tensorview/tensorview.h>
#include <torch/script.h>
namespace tv { namespace tv {
struct TorchGPU: public tv::GPU { struct TorchGPU : public tv::GPU {
virtual cudaStream_t getStream() const override { virtual cudaStream_t getStream() const override {
return at::cuda::getCurrentCUDAStream(); return at::cuda::getCurrentCUDAStream();
} }
}; };
template <typename T> void check_torch_dtype(const torch::Tensor &tensor) { template <typename T>
void check_torch_dtype(const torch::Tensor &tensor) {
switch (tensor.type().scalarType()) { switch (tensor.type().scalarType()) {
case at::ScalarType::Double: { case at::ScalarType::Double: {
auto val = std::is_same<std::remove_const_t<T>, double>::value; auto val = std::is_same<std::remove_const_t<T>, double>::value;
...@@ -65,6 +66,6 @@ tv::TensorView<T> torch2tv(const torch::Tensor &tensor) { ...@@ -65,6 +66,6 @@ tv::TensorView<T> torch2tv(const torch::Tensor &tensor) {
for (auto i : tensor.sizes()) { for (auto i : tensor.sizes()) {
shape.push_back(i); shape.push_back(i);
} }
return tv::TensorView<T>(tensor.data<std::remove_const_t<T>>(), shape); return tv::TensorView<T>(tensor.data_ptr<std::remove_const_t<T>>(), shape);
} }
} // namespace tv } // namespace tv
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#include <ATen/cuda/CUDAApplyUtils.cuh> #include <ATen/cuda/CUDAApplyUtils.cuh>
#define CHECK_CUDA(x) \ #define CHECK_CUDA(x) \
TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor") TORCH_CHECK(x.device().is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) \ #define CHECK_CONTIGUOUS(x) \
TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) \ #define CHECK_INPUT(x) \
...@@ -177,7 +177,7 @@ std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu( ...@@ -177,7 +177,7 @@ std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu(
dim3 threads(threadsPerBlock); dim3 threads(threadsPerBlock);
cudaStream_t map_stream = at::cuda::getCurrentCUDAStream(); cudaStream_t map_stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_ALL_TYPES( AT_DISPATCH_ALL_TYPES(
voxel_mapping.type(), "determin_duplicate", ([&] { voxel_mapping.scalar_type(), "determin_duplicate", ([&] {
point_to_voxelidx_kernel<int><<<blocks, threads, 0, map_stream>>>( point_to_voxelidx_kernel<int><<<blocks, threads, 0, map_stream>>>(
voxel_mapping.data_ptr<int>(), point_to_voxelidx.data_ptr<int>(), voxel_mapping.data_ptr<int>(), point_to_voxelidx.data_ptr<int>(),
point_to_pointidx.data_ptr<int>(), num_points, NDim); point_to_pointidx.data_ptr<int>(), num_points, NDim);
...@@ -203,7 +203,7 @@ std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu( ...@@ -203,7 +203,7 @@ std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu(
voxel_mapping.options()); // must be zero from the begining voxel_mapping.options()); // must be zero from the begining
cudaStream_t logic_stream = at::cuda::getCurrentCUDAStream(); cudaStream_t logic_stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_ALL_TYPES( AT_DISPATCH_ALL_TYPES(
voxel_mapping.type(), "determin_duplicate", ([&] { voxel_mapping.scalar_type(), "determin_duplicate", ([&] {
determin_voxel_num<int><<<1, 1, 0, logic_stream>>>( determin_voxel_num<int><<<1, 1, 0, logic_stream>>>(
voxel_mapping.data_ptr<int>(), num_points_per_voxel.data_ptr<int>(), voxel_mapping.data_ptr<int>(), num_points_per_voxel.data_ptr<int>(),
point_to_voxelidx.data_ptr<int>(), point_to_voxelidx.data_ptr<int>(),
...@@ -228,7 +228,7 @@ std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu( ...@@ -228,7 +228,7 @@ std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu(
dim3 cp_threads(threadsPerBlock, 4); dim3 cp_threads(threadsPerBlock, 4);
cudaStream_t cp_stream = at::cuda::getCurrentCUDAStream(); cudaStream_t cp_stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_ALL_TYPES( AT_DISPATCH_ALL_TYPES(
points.type(), "scatter_point_to_voxel", ([&] { points.scalar_type(), "scatter_point_to_voxel", ([&] {
scatter_point_to_voxel_kernel<float, int> scatter_point_to_voxel_kernel<float, int>
<<<blocks, cp_threads, 0, cp_stream>>>( <<<blocks, cp_threads, 0, cp_stream>>>(
points.data_ptr<float>(), voxel_mapping.data_ptr<int>(), points.data_ptr<float>(), voxel_mapping.data_ptr<int>(),
...@@ -265,8 +265,8 @@ void dynamic_point_to_voxel_backward_gpu(at::Tensor& grad_input_points, ...@@ -265,8 +265,8 @@ void dynamic_point_to_voxel_backward_gpu(at::Tensor& grad_input_points,
dim3 blocks(col_blocks); dim3 blocks(col_blocks);
dim3 cp_threads(threadsPerBlock, 4); dim3 cp_threads(threadsPerBlock, 4);
cudaStream_t cp_stream = at::cuda::getCurrentCUDAStream(); cudaStream_t cp_stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_ALL_TYPES(grad_input_points.type(), "scatter_point_to_voxel", AT_DISPATCH_ALL_TYPES(grad_input_points.scalar_type(),
([&] { "scatter_point_to_voxel", ([&] {
map_voxel_to_point_kernel<float, int> map_voxel_to_point_kernel<float, int>
<<<blocks, cp_threads, 0, cp_stream>>>( <<<blocks, cp_threads, 0, cp_stream>>>(
grad_input_points.data_ptr<float>(), grad_input_points.data_ptr<float>(),
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment