Commit e0d892c7 authored by liyinhao's avatar liyinhao
Browse files

Merge branch 'master_temp' into indoor_loading

# Conflicts:
#	tools/data_converter/sunrgbd_data_utils.py
parents 929ebfe8 f584b970
//Modified from
//https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//Points in boxes gpu
//Written by Shaoshuai Shi
//All Rights Reserved 2019.
// Modified from
// https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
// Points in boxes gpu
// Written by Shaoshuai Shi
// All Rights Reserved 2019.
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <torch/extension.h>
#include <torch/serialize/tensor.h>
#define THREADS_PER_BLOCK 256
#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
#define CHECK_CUDA(x) \
TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CONTIGUOUS(x) \
TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
CHECK_CONTIGUOUS(x)
// #define DEBUG
__device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float rz, float &local_x, float &local_y){
__device__ inline void lidar_to_local_coords(float shift_x, float shift_y,
float rz, float &local_x,
float &local_y) {
// should rotate pi/2 + alpha to translate LiDAR to local
float rot_angle = rz + M_PI / 2;
float cosa = cos(rot_angle), sina = sin(rot_angle);
......@@ -29,10 +32,11 @@ __device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float
local_y = shift_x * sina + shift_y * cosa;
}
__device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, float &local_x, float &local_y){
__device__ inline int check_pt_in_box3d(const float *pt, const float *box3d,
float &local_x, float &local_y) {
// param pt: (x, y, z)
// param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the bottom center
// param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the
// bottom center
float x = pt[0], y = pt[1], z = pt[2];
float cx = box3d[0], cy = box3d[1], cz = box3d[2];
float w = box3d[3], l = box3d[4], h = box3d[5], rz = box3d[6];
......@@ -40,16 +44,19 @@ __device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, flo
if (fabsf(z - cz) > h / 2.0) return 0;
lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y);
float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) & (local_y > -w / 2.0) & (local_y < w / 2.0);
float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) &
(local_y > -w / 2.0) & (local_y < w / 2.0);
return in_flag;
}
__global__ void points_in_boxes_kernel(int batch_size, int boxes_num, int pts_num, const float *boxes,
const float *pts, int *box_idx_of_points){
// params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the bottom center, each box DO NOT overlaps
// params pts: (B, npoints, 3) [x, y, z] in LiDAR coordinate
// params boxes_idx_of_points: (B, npoints), default -1
__global__ void points_in_boxes_kernel(int batch_size, int boxes_num,
int pts_num, const float *boxes,
const float *pts,
int *box_idx_of_points) {
// params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is
// the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x,
// y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default
// -1
int bs_idx = blockIdx.y;
int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
......@@ -61,26 +68,28 @@ __global__ void points_in_boxes_kernel(int batch_size, int boxes_num, int pts_nu
float local_x = 0, local_y = 0;
int cur_in_flag = 0;
for (int k = 0; k < boxes_num; k++){
for (int k = 0; k < boxes_num; k++) {
cur_in_flag = check_pt_in_box3d(pts, boxes + k * 7, local_x, local_y);
if (cur_in_flag){
if (cur_in_flag) {
box_idx_of_points[0] = k;
break;
}
}
}
void points_in_boxes_launcher(int batch_size, int boxes_num, int pts_num, const float *boxes,
const float *pts, int *box_idx_of_points){
// params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the bottom center, each box DO NOT overlaps
// params pts: (B, npoints, 3) [x, y, z] in LiDAR coordinate
// params boxes_idx_of_points: (B, npoints), default -1
void points_in_boxes_launcher(int batch_size, int boxes_num, int pts_num,
const float *boxes, const float *pts,
int *box_idx_of_points) {
// params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is
// the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x,
// y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default
// -1
cudaError_t err;
dim3 blocks(DIVUP(pts_num, THREADS_PER_BLOCK), batch_size);
dim3 threads(THREADS_PER_BLOCK);
points_in_boxes_kernel<<<blocks, threads>>>(batch_size, boxes_num, pts_num, boxes, pts, box_idx_of_points);
points_in_boxes_kernel<<<blocks, threads>>>(batch_size, boxes_num, pts_num,
boxes, pts, box_idx_of_points);
err = cudaGetLastError();
if (cudaSuccess != err) {
......@@ -93,10 +102,12 @@ void points_in_boxes_launcher(int batch_size, int boxes_num, int pts_num, const
#endif
}
int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tensor box_idx_of_points_tensor){
// params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the bottom center, each box DO NOT overlaps
// params pts: (B, npoints, 3) [x, y, z] in LiDAR coordinate
// params boxes_idx_of_points: (B, npoints), default -1
int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor,
at::Tensor box_idx_of_points_tensor) {
// params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is
// the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x,
// y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default
// -1
CHECK_INPUT(boxes_tensor);
CHECK_INPUT(pts_tensor);
......@@ -106,11 +117,12 @@ int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tens
int boxes_num = boxes_tensor.size(1);
int pts_num = pts_tensor.size(1);
const float *boxes = boxes_tensor.data<float>();
const float *pts = pts_tensor.data<float>();
int *box_idx_of_points = box_idx_of_points_tensor.data<int>();
const float *boxes = boxes_tensor.data_ptr<float>();
const float *pts = pts_tensor.data_ptr<float>();
int *box_idx_of_points = box_idx_of_points_tensor.data_ptr<int>();
points_in_boxes_launcher(batch_size, boxes_num, pts_num, boxes, pts, box_idx_of_points);
points_in_boxes_launcher(batch_size, boxes_num, pts_num, boxes, pts,
box_idx_of_points);
return 1;
}
//Modified from
//https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//RoI-aware point cloud feature pooling
//Written by Shaoshuai Shi
//All Rights Reserved 2019.
// Modified from
// https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
// RoI-aware point cloud feature pooling
// Written by Shaoshuai Shi
// All Rights Reserved 2019.
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include <assert.h>
#include <torch/extension.h>
#include <torch/serialize/tensor.h>
#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, int max_pts_each_voxel,
int out_x, int out_y, int out_z, const float *rois, const float *pts, const float *pts_feature,
int *argmax, int *pts_idx_of_voxels, float *pooled_features, int pool_method);
void roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y, int out_z, int channels, int max_pts_each_voxel,
const int *pts_idx_of_voxels, const int *argmax, const float *grad_out, float *grad_in, int pool_method);
int roiaware_pool3d_gpu(at::Tensor rois, at::Tensor pts, at::Tensor pts_feature, at::Tensor argmax,
at::Tensor pts_idx_of_voxels, at::Tensor pooled_features, int pool_method);
int roiaware_pool3d_gpu_backward(at::Tensor pts_idx_of_voxels, at::Tensor argmax, at::Tensor grad_out,
#define CHECK_CUDA(x) \
TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CONTIGUOUS(x) \
TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
CHECK_CONTIGUOUS(x)
void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels,
int max_pts_each_voxel, int out_x, int out_y,
int out_z, const float *rois, const float *pts,
const float *pts_feature, int *argmax,
int *pts_idx_of_voxels, float *pooled_features,
int pool_method);
void roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y,
int out_z, int channels,
int max_pts_each_voxel,
const int *pts_idx_of_voxels,
const int *argmax, const float *grad_out,
float *grad_in, int pool_method);
int roiaware_pool3d_gpu(at::Tensor rois, at::Tensor pts, at::Tensor pts_feature,
at::Tensor argmax, at::Tensor pts_idx_of_voxels,
at::Tensor pooled_features, int pool_method);
int roiaware_pool3d_gpu_backward(at::Tensor pts_idx_of_voxels,
at::Tensor argmax, at::Tensor grad_out,
at::Tensor grad_in, int pool_method);
int points_in_boxes_cpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tensor pts_indices_tensor);
int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tensor box_idx_of_points_tensor);
int points_in_boxes_cpu(at::Tensor boxes_tensor, at::Tensor pts_tensor,
at::Tensor pts_indices_tensor);
int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor,
at::Tensor box_idx_of_points_tensor);
int roiaware_pool3d_gpu(at::Tensor rois, at::Tensor pts, at::Tensor pts_feature, at::Tensor argmax, at::Tensor pts_idx_of_voxels, at::Tensor pooled_features, int pool_method){
int roiaware_pool3d_gpu(at::Tensor rois, at::Tensor pts, at::Tensor pts_feature,
at::Tensor argmax, at::Tensor pts_idx_of_voxels,
at::Tensor pooled_features, int pool_method) {
// params rois: (N, 7) [x, y, z, w, l, h, ry] in LiDAR coordinate
// params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
// params pts_feature: (npoints, C)
......@@ -56,22 +69,27 @@ int roiaware_pool3d_gpu(at::Tensor rois, at::Tensor pts, at::Tensor pts_feature,
int out_x = pts_idx_of_voxels.size(1);
int out_y = pts_idx_of_voxels.size(2);
int out_z = pts_idx_of_voxels.size(3);
assert ((out_x < 256) && (out_y < 256) && (out_z < 256)); // we encode index with 8bit
assert((out_x < 256) && (out_y < 256) &&
(out_z < 256)); // we encode index with 8bit
const float *rois_data = rois.data<float>();
const float *pts_data = pts.data<float>();
const float *pts_feature_data = pts_feature.data<float>();
int *argmax_data = argmax.data<int>();
int *pts_idx_of_voxels_data = pts_idx_of_voxels.data<int>();
float *pooled_features_data = pooled_features.data<float>();
const float *rois_data = rois.data_ptr<float>();
const float *pts_data = pts.data_ptr<float>();
const float *pts_feature_data = pts_feature.data_ptr<float>();
int *argmax_data = argmax.data_ptr<int>();
int *pts_idx_of_voxels_data = pts_idx_of_voxels.data_ptr<int>();
float *pooled_features_data = pooled_features.data_ptr<float>();
roiaware_pool3d_launcher(boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
rois_data, pts_data, pts_feature_data, argmax_data, pts_idx_of_voxels_data, pooled_features_data, pool_method);
roiaware_pool3d_launcher(
boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
rois_data, pts_data, pts_feature_data, argmax_data,
pts_idx_of_voxels_data, pooled_features_data, pool_method);
return 1;
}
int roiaware_pool3d_gpu_backward(at::Tensor pts_idx_of_voxels, at::Tensor argmax, at::Tensor grad_out, at::Tensor grad_in, int pool_method){
int roiaware_pool3d_gpu_backward(at::Tensor pts_idx_of_voxels,
at::Tensor argmax, at::Tensor grad_out,
at::Tensor grad_in, int pool_method) {
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params argmax: (N, out_x, out_y, out_z, C)
// params grad_out: (N, out_x, out_y, out_z, C)
......@@ -90,20 +108,25 @@ int roiaware_pool3d_gpu_backward(at::Tensor pts_idx_of_voxels, at::Tensor argmax
int max_pts_each_voxel = pts_idx_of_voxels.size(4); // index 0 is the counter
int channels = grad_out.size(4);
const int *pts_idx_of_voxels_data = pts_idx_of_voxels.data<int>();
const int *argmax_data = argmax.data<int>();
const float *grad_out_data = grad_out.data<float>();
float *grad_in_data = grad_in.data<float>();
const int *pts_idx_of_voxels_data = pts_idx_of_voxels.data_ptr<int>();
const int *argmax_data = argmax.data_ptr<int>();
const float *grad_out_data = grad_out.data_ptr<float>();
float *grad_in_data = grad_in.data_ptr<float>();
roiaware_pool3d_backward_launcher(boxes_num, out_x, out_y, out_z, channels, max_pts_each_voxel,
pts_idx_of_voxels_data, argmax_data, grad_out_data, grad_in_data, pool_method);
roiaware_pool3d_backward_launcher(boxes_num, out_x, out_y, out_z, channels,
max_pts_each_voxel, pts_idx_of_voxels_data,
argmax_data, grad_out_data, grad_in_data,
pool_method);
return 1;
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &roiaware_pool3d_gpu, "roiaware pool3d forward (CUDA)");
m.def("backward", &roiaware_pool3d_gpu_backward, "roiaware pool3d backward (CUDA)");
m.def("points_in_boxes_gpu", &points_in_boxes_gpu, "points_in_boxes_gpu forward (CUDA)");
m.def("points_in_boxes_cpu", &points_in_boxes_cpu, "points_in_boxes_cpu forward (CPU)");
m.def("backward", &roiaware_pool3d_gpu_backward,
"roiaware pool3d backward (CUDA)");
m.def("points_in_boxes_gpu", &points_in_boxes_gpu,
"points_in_boxes_gpu forward (CUDA)");
m.def("points_in_boxes_cpu", &points_in_boxes_cpu,
"points_in_boxes_cpu forward (CPU)");
}
from mmcv.cnn import build_norm_layer
from torch import nn
import mmdet3d.ops.spconv as spconv
from mmdet.models.backbones.resnet import BasicBlock, Bottleneck
def conv3x3(in_planes, out_planes, stride=1, indice_key=None):
"""3x3 submanifold sparse convolution with padding.
Args:
in_planes (int): the number of input channels
out_planes (int): the number of output channels
stride (int): the stride of convolution
indice_key (str): the indice key used for sparse tensor
Returns:
spconv.conv.SubMConv3d: 3x3 submanifold sparse convolution ops
"""
# TODO: deprecate this class
return spconv.SubMConv3d(
in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=1,
bias=False,
indice_key=indice_key)
def conv1x1(in_planes, out_planes, stride=1, indice_key=None):
"""1x1 submanifold sparse convolution with padding.
Args:
in_planes (int): the number of input channels
out_planes (int): the number of output channels
stride (int): the stride of convolution
indice_key (str): the indice key used for sparse tensor
Returns:
spconv.conv.SubMConv3d: 1x1 submanifold sparse convolution ops
"""
# TODO: deprecate this class
return spconv.SubMConv3d(
in_planes,
out_planes,
kernel_size=1,
stride=stride,
padding=1,
bias=False,
indice_key=indice_key)
class SparseBasicBlockV0(spconv.SparseModule):
expansion = 1
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
indice_key=None,
norm_cfg=None):
"""Sparse basic block for PartA^2.
Sparse basic block implemented with submanifold sparse convolution.
"""
# TODO: deprecate this class
super().__init__()
self.conv1 = conv3x3(inplanes, planes, stride, indice_key=indice_key)
norm_name1, norm_layer1 = build_norm_layer(norm_cfg, planes)
self.bn1 = norm_layer1
self.relu = nn.ReLU()
self.conv2 = conv3x3(planes, planes, indice_key=indice_key)
norm_name2, norm_layer2 = build_norm_layer(norm_cfg, planes)
self.bn2 = norm_layer2
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x.features
assert x.features.dim() == 2, f'x.features.dim()={x.features.dim()}'
out = self.conv1(x)
out.features = self.bn1(out.features)
out.features = self.relu(out.features)
out = self.conv2(out)
out.features = self.bn2(out.features)
if self.downsample is not None:
identity = self.downsample(x)
out.features += identity
out.features = self.relu(out.features)
return out
class SparseBottleneckV0(spconv.SparseModule):
expansion = 4
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
indice_key=None,
norm_fn=None):
"""Sparse bottleneck block for PartA^2.
Bottleneck block implemented with submanifold sparse convolution.
"""
# TODO: deprecate this class
super().__init__()
self.conv1 = conv1x1(inplanes, planes, indice_key=indice_key)
self.bn1 = norm_fn(planes)
self.conv2 = conv3x3(planes, planes, stride, indice_key=indice_key)
self.bn2 = norm_fn(planes)
self.conv3 = conv1x1(
planes, planes * self.expansion, indice_key=indice_key)
self.bn3 = norm_fn(planes * self.expansion)
self.relu = nn.ReLU()
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x.features
out = self.conv1(x)
out.features = self.bn1(out.features)
out.features = self.relu(out.features)
out = self.conv2(out)
out.features = self.bn2(out.features)
out.features = self.relu(out.features)
out = self.conv3(out)
out.features = self.bn3(out.features)
if self.downsample is not None:
identity = self.downsample(x)
out.features += identity
out.features = self.relu(out.features)
return out
class SparseBottleneck(Bottleneck, spconv.SparseModule):
expansion = 4
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
conv_cfg=None,
norm_cfg=None):
"""Sparse bottleneck block for PartA^2.
Bottleneck block implemented with submanifold sparse convolution.
"""
spconv.SparseModule.__init__(self)
Bottleneck.__init__(
self,
inplanes,
planes,
stride=stride,
downsample=downsample,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg)
def forward(self, x):
identity = x.features
out = self.conv1(x)
out.features = self.bn1(out.features)
out.features = self.relu(out.features)
out = self.conv2(out)
out.features = self.bn2(out.features)
out.features = self.relu(out.features)
out = self.conv3(out)
out.features = self.bn3(out.features)
if self.downsample is not None:
identity = self.downsample(x)
out.features += identity
out.features = self.relu(out.features)
return out
class SparseBasicBlock(BasicBlock, spconv.SparseModule):
expansion = 1
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
conv_cfg=None,
norm_cfg=None):
"""Sparse basic block for PartA^2.
Sparse basic block implemented with submanifold sparse convolution.
"""
spconv.SparseModule.__init__(self)
BasicBlock.__init__(
self,
inplanes,
planes,
stride=stride,
downsample=downsample,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg)
def forward(self, x):
identity = x.features
assert x.features.dim() == 2, f'x.features.dim()={x.features.dim()}'
out = self.conv1(x)
out.features = self.norm1(out.features)
out.features = self.relu(out.features)
out = self.conv2(out)
out.features = self.norm2(out.features)
if self.downsample is not None:
identity = self.downsample(x)
out.features += identity
out.features = self.relu(out.features)
return out
......@@ -16,6 +16,7 @@ import math
import numpy as np
import torch
from mmcv.cnn import CONV_LAYERS
from torch.nn import init
from torch.nn.parameter import Parameter
......@@ -205,6 +206,7 @@ class SparseConvolution(SparseModule):
return out_tensor
@CONV_LAYERS.register_module()
class SparseConv2d(SparseConvolution):
def __init__(self,
......@@ -230,6 +232,7 @@ class SparseConv2d(SparseConvolution):
indice_key=indice_key)
@CONV_LAYERS.register_module()
class SparseConv3d(SparseConvolution):
def __init__(self,
......@@ -255,6 +258,7 @@ class SparseConv3d(SparseConvolution):
indice_key=indice_key)
@CONV_LAYERS.register_module()
class SparseConv4d(SparseConvolution):
def __init__(self,
......@@ -280,6 +284,7 @@ class SparseConv4d(SparseConvolution):
indice_key=indice_key)
@CONV_LAYERS.register_module()
class SparseConvTranspose2d(SparseConvolution):
def __init__(self,
......@@ -306,6 +311,7 @@ class SparseConvTranspose2d(SparseConvolution):
indice_key=indice_key)
@CONV_LAYERS.register_module()
class SparseConvTranspose3d(SparseConvolution):
def __init__(self,
......@@ -332,6 +338,7 @@ class SparseConvTranspose3d(SparseConvolution):
indice_key=indice_key)
@CONV_LAYERS.register_module()
class SparseInverseConv2d(SparseConvolution):
def __init__(self,
......@@ -350,6 +357,7 @@ class SparseInverseConv2d(SparseConvolution):
indice_key=indice_key)
@CONV_LAYERS.register_module()
class SparseInverseConv3d(SparseConvolution):
def __init__(self,
......@@ -368,6 +376,7 @@ class SparseInverseConv3d(SparseConvolution):
indice_key=indice_key)
@CONV_LAYERS.register_module()
class SubMConv2d(SparseConvolution):
def __init__(self,
......@@ -394,6 +403,7 @@ class SubMConv2d(SparseConvolution):
indice_key=indice_key)
@CONV_LAYERS.register_module()
class SubMConv3d(SparseConvolution):
def __init__(self,
......@@ -420,6 +430,7 @@ class SubMConv3d(SparseConvolution):
indice_key=indice_key)
@CONV_LAYERS.register_module()
class SubMConv4d(SparseConvolution):
def __init__(self,
......
......@@ -34,7 +34,7 @@ torch::Tensor indiceMaxPool(torch::Tensor features, torch::Tensor indicePairs,
torch::Tensor output = torch::zeros({numAct, numInPlanes}, options);
double totalTime = 0;
for (int i = 0; i < kernelVolume; ++i) {
auto nHot = indicePairNumCpu.data<int>()[i];
auto nHot = indicePairNumCpu.data_ptr<int>()[i];
if (nHot <= 0) {
continue;
}
......@@ -60,7 +60,8 @@ torch::Tensor indiceMaxPool(torch::Tensor features, torch::Tensor indicePairs,
template <typename T>
torch::Tensor indiceMaxPoolBackward(torch::Tensor features,
torch::Tensor outFeatures,
torch::Tensor outGrad, torch::Tensor indicePairs,
torch::Tensor outGrad,
torch::Tensor indicePairs,
torch::Tensor indiceNum) {
auto device = features.device().type();
auto numInPlanes = features.size(1);
......@@ -70,7 +71,7 @@ torch::Tensor indiceMaxPoolBackward(torch::Tensor features,
torch::Tensor inputGrad = torch::zeros(features.sizes(), options);
auto kernelVolume = indicePairs.size(0);
for (int i = 0; i < kernelVolume; ++i) {
auto nHot = indicePairNumCpu.data<int>()[i];
auto nHot = indicePairNumCpu.data_ptr<int>()[i];
if (nHot <= 0) {
continue;
}
......
......@@ -13,20 +13,21 @@
// limitations under the License.
#pragma once
#include <tensorview/tensorview.h>
#include <torch/script.h>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <tensorview/tensorview.h>
#include <torch/script.h>
namespace tv {
struct TorchGPU: public tv::GPU {
struct TorchGPU : public tv::GPU {
virtual cudaStream_t getStream() const override {
return at::cuda::getCurrentCUDAStream();
}
};
template <typename T> void check_torch_dtype(const torch::Tensor &tensor) {
template <typename T>
void check_torch_dtype(const torch::Tensor &tensor) {
switch (tensor.type().scalarType()) {
case at::ScalarType::Double: {
auto val = std::is_same<std::remove_const_t<T>, double>::value;
......@@ -65,6 +66,6 @@ tv::TensorView<T> torch2tv(const torch::Tensor &tensor) {
for (auto i : tensor.sizes()) {
shape.push_back(i);
}
return tv::TensorView<T>(tensor.data<std::remove_const_t<T>>(), shape);
return tv::TensorView<T>(tensor.data_ptr<std::remove_const_t<T>>(), shape);
}
} // namespace tv
......@@ -6,7 +6,7 @@
#include <ATen/cuda/CUDAApplyUtils.cuh>
#define CHECK_CUDA(x) \
TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
TORCH_CHECK(x.device().is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) \
TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) \
......@@ -177,7 +177,7 @@ std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu(
dim3 threads(threadsPerBlock);
cudaStream_t map_stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_ALL_TYPES(
voxel_mapping.type(), "determin_duplicate", ([&] {
voxel_mapping.scalar_type(), "determin_duplicate", ([&] {
point_to_voxelidx_kernel<int><<<blocks, threads, 0, map_stream>>>(
voxel_mapping.data_ptr<int>(), point_to_voxelidx.data_ptr<int>(),
point_to_pointidx.data_ptr<int>(), num_points, NDim);
......@@ -203,7 +203,7 @@ std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu(
voxel_mapping.options()); // must be zero from the begining
cudaStream_t logic_stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_ALL_TYPES(
voxel_mapping.type(), "determin_duplicate", ([&] {
voxel_mapping.scalar_type(), "determin_duplicate", ([&] {
determin_voxel_num<int><<<1, 1, 0, logic_stream>>>(
voxel_mapping.data_ptr<int>(), num_points_per_voxel.data_ptr<int>(),
point_to_voxelidx.data_ptr<int>(),
......@@ -228,7 +228,7 @@ std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu(
dim3 cp_threads(threadsPerBlock, 4);
cudaStream_t cp_stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_ALL_TYPES(
points.type(), "scatter_point_to_voxel", ([&] {
points.scalar_type(), "scatter_point_to_voxel", ([&] {
scatter_point_to_voxel_kernel<float, int>
<<<blocks, cp_threads, 0, cp_stream>>>(
points.data_ptr<float>(), voxel_mapping.data_ptr<int>(),
......@@ -265,8 +265,8 @@ void dynamic_point_to_voxel_backward_gpu(at::Tensor& grad_input_points,
dim3 blocks(col_blocks);
dim3 cp_threads(threadsPerBlock, 4);
cudaStream_t cp_stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_ALL_TYPES(grad_input_points.type(), "scatter_point_to_voxel",
([&] {
AT_DISPATCH_ALL_TYPES(grad_input_points.scalar_type(),
"scatter_point_to_voxel", ([&] {
map_voxel_to_point_kernel<float, int>
<<<blocks, cp_threads, 0, cp_stream>>>(
grad_input_points.data_ptr<float>(),
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment