Commit d6310a27 authored by traveller59's avatar traveller59
Browse files

add voxel filtering

parent 73427720
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef FUSED_SPARSE_CONV_OP_H_
#define FUSED_SPARSE_CONV_OP_H_
#include <cuda_runtime_api.h>
#include <spconv/indice.h>
#include <spconv/reordering.h>
#include <torch/script.h>
#include <torch_utils.h>
#include <utility/timer.h>
namespace spconv {
// torch.jit's doc says only support int64, so we need to convert to int32.
template <typename T>
torch::Tensor fusedIndiceConvBatchNorm(torch::Tensor features, torch::Tensor filters, torch::Tensor bias,
torch::Tensor indicePairs, torch::Tensor indiceNum,
int64_t numActOut, int64_t _inverse, int64_t _subM) {
bool subM = _subM != 0;
bool inverse = _inverse != 0;
auto device = features.device().type();
auto ndim = filters.dim() - 2;
auto kernelVolume = indicePairs.size(0);
auto numInPlanes = features.size(1);
auto numOutPlanes = filters.size(ndim + 1);
auto indicePairNumCpu = indiceNum.to({torch::kCPU});
auto indicePairMaxSizeIter = std::max_element(
indicePairNumCpu.data<int>(), indicePairNumCpu.data<int>() + kernelVolume);
int indicePairMaxOffset = indicePairMaxSizeIter - indicePairNumCpu.data<int>();
int indicePairMaxSize = *indicePairMaxSizeIter;
/*if (_subM){
std::vector<int> indicePairNumVec(indicePairNumCpu.data<int>(), indicePairNumCpu.data<int>() + kernelVolume);
indicePairNumVec.erase(indicePairNumVec.begin() + indicePairMaxOffset);
auto indicePairVecMaxSizeIter = std::max_element(
indicePairNumVec.begin(), indicePairNumVec.end());
indicePairMaxSize = *indicePairVecMaxSizeIter;
}*/
auto options =
torch::TensorOptions().dtype(features.dtype()).device(features.device());
// auto indicePairOptions =
// torch::TensorOptions().dtype(torch::kInt64).device(indicePairs.device());
torch::Tensor output = torch::zeros({numActOut, numOutPlanes}, options).copy_(bias);
torch::Tensor inputBuffer = torch::zeros({indicePairMaxSize, numInPlanes}, options);
torch::Tensor outputBuffer =
torch::zeros({indicePairMaxSize, numOutPlanes}, options);
filters = filters.view({-1, numInPlanes, numOutPlanes});
if (subM) { // the center index of subm conv don't need gather and scatter
// add.
torch::mm_out(output, features, filters[indicePairMaxOffset]);
}
double totalGatherTime = 0;
double totalGEMMTime = 0;
double totalSAddTime = 0;
for (int i = 0; i < kernelVolume; ++i) {
auto nHot = indicePairNumCpu.data<int>()[i];
if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {
continue;
}
// auto timer = spconv::CudaContextTimer<>();
auto outputBufferBlob =
torch::from_blob(outputBuffer.data<T>(), {nHot, numOutPlanes}, options);
auto inputBufferBlob =
torch::from_blob(inputBuffer.data<T>(), {nHot, numInPlanes}, options);
if (device == torch::kCPU) {
functor::SparseGatherFunctor<tv::CPU, T, int> gatherFtor;
gatherFtor(tv::CPU(), tv::torch2tv<T>(inputBuffer),
tv::torch2tv<const T>(features),
tv::torch2tv<const int>(indicePairs).subview(i, inverse), nHot);
} else {
functor::SparseGatherFunctor<tv::GPU, T, int> gatherFtor;
gatherFtor(tv::TorchGPU(), tv::torch2tv<T>(inputBuffer),
tv::torch2tv<const T>(features),
tv::torch2tv<const int>(indicePairs).subview(i, inverse), nHot);
TV_CHECK_CUDA_ERR();
/* slower than SparseGatherFunctor, may due to int->long conversion
auto indicePairLong = indicePairs[i][inverse].to(torch::kInt64);
auto indicePairBlob = torch::from_blob(indicePairLong.data<long>(), {nHot},
indicePairOptions);
torch::index_select_out(inputBufferBlob, features, 0,
indicePairBlob);*/
}
// totalGatherTime += timer.report() / 1000.0;
torch::mm_out(outputBufferBlob, inputBufferBlob, filters[i]);
// totalGEMMTime += timer.report() / 1000.0;
if (device == torch::kCPU) {
functor::SparseScatterAddFunctor<tv::CPU, T, int> scatterFtor;
scatterFtor(tv::CPU(), tv::torch2tv<T>(output),
tv::torch2tv<const T>(outputBuffer),
tv::torch2tv<const int>(indicePairs).subview(i, !inverse), nHot,
true);
} else {
functor::SparseScatterAddFunctor<tv::GPU, T, int> scatterFtor;
scatterFtor(tv::TorchGPU(), tv::torch2tv<T>(output),
tv::torch2tv<const T>(outputBuffer),
tv::torch2tv<const int>(indicePairs).subview(i, !inverse), nHot,
true);
TV_CHECK_CUDA_ERR();
}
// totalSAddTime += timer.report() / 1000.0;
}
// std::cout << "gather time " << totalGatherTime << std::endl;
// std::cout << "gemm time " << totalGEMMTime << std::endl;
// std::cout << "scatteradd time " << totalSAddTime << std::endl;
return output;
}
} // namespace spconv
#endif
\ No newline at end of file
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef NMS_FUNCTOR_H_
#define NMS_FUNCTOR_H_
#include <tensorview/tensorview.h>
namespace spconv
{
namespace functor
{
template <typename Device, typename T, typename Index>
struct NonMaxSupressionFunctor
{
Index operator()(const Device& d, tv::TensorView<Index> keep,
tv::TensorView<const T> boxes,
T threshold, T eps);
};
template <typename Device, typename T, typename Index>
struct rotateNonMaxSupressionFunctor
{
Index operator()(const Device& d, tv::TensorView<Index> keep,
tv::TensorView<const T> boxCorners,
tv::TensorView<const T> standupIoU, T threshold);
};
} // namespace functor
} // namespace spconv
#endif
\ No newline at end of file
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef NMS_TORCH_OP_H_
#define NMS_TORCH_OP_H_
#include <cuda_runtime_api.h>
#include <spconv/indice.h>
#include <spconv/reordering.h>
#include <torch/script.h>
#include <torch_utils.h>
#include <utility/timer.h>
#include <spconv/nms_functor.h>
namespace spconv {
// torch.jit's doc says only support int64, so we need to convert to int32.
template <typename T>
torch::Tensor
nonMaxSuppression(torch::Tensor boxes, torch::Tensor scores, int64_t preMaxSize,
int64_t postMaxSize, double thresh, double eps) {
// auto timer = spconv::CudaContextTimer<>();
tv::check_torch_dtype<T>(boxes);
auto resOptions =
torch::TensorOptions().dtype(torch::kInt64).device(boxes.device());
if (boxes.size(0) == 0){
return torch::zeros({0}, resOptions);
}
torch::Tensor indices;
if (preMaxSize > 0){
auto numKeepedScores = scores.size(0);
preMaxSize = std::min(numKeepedScores, preMaxSize);
auto res = torch::topk(scores, preMaxSize);
indices = std::get<1>(res);
boxes = torch::index_select(boxes, 0, indices);
}else{
indices = std::get<1>(torch::sort(scores));
boxes = torch::index_select(boxes, 0, indices);
}
if (boxes.size(0) == 0)
return torch::zeros({0}, resOptions);
auto keep = torch::zeros({boxes.size(0)}, resOptions);
int64_t keepNum = 0;
if (boxes.device().type() == torch::kCPU) {
auto nmsFunctor = functor::NonMaxSupressionFunctor<tv::CPU, T, int64_t>();
keepNum = nmsFunctor(tv::CPU(), tv::torch2tv<int64_t>(keep),
tv::torch2tv<const T>(boxes), T(thresh), T(eps));
}else{
TV_ASSERT_RT_ERR(false, "not implemented");
}
if (postMaxSize <= 0){
postMaxSize = keepNum;
}
// std::cout << keep << std::endl;
keep = keep.slice(0, 0, std::min(keepNum, postMaxSize));
if (preMaxSize > 0){
return torch::index_select(indices, 0, keep);
}
return keep;
}
} // namespace spconv
#endif
\ No newline at end of file
...@@ -87,8 +87,328 @@ int points_to_voxel_3d_np(py::array_t<DType> points, py::array_t<DType> voxels, ...@@ -87,8 +87,328 @@ int points_to_voxel_3d_np(py::array_t<DType> points, py::array_t<DType> voxels,
} }
for (int i = 0; i < voxel_num; ++i) { for (int i = 0; i < voxel_num; ++i) {
coor_to_voxelidx_rw(coors_rw(i, 0), coors_rw(i, 1), coors_rw(i, 2)) = -1; coor_to_voxelidx_rw(coors_rw(i, 0), coors_rw(i, 1), coors_rw(i, 2)) = -1;
}
return voxel_num;
}
template <typename DType, int NDim>
int points_to_voxel_3d_np_mean(py::array_t<DType> points, py::array_t<DType> voxels,
py::array_t<DType> means,
py::array_t<int> coors,
py::array_t<int> num_points_per_voxel,
py::array_t<int> coor_to_voxelidx,
std::vector<DType> voxel_size,
std::vector<DType> coors_range, int max_points,
int max_voxels) {
auto points_rw = points.template mutable_unchecked<2>();
auto means_rw = means.template mutable_unchecked<2>();
auto voxels_rw = voxels.template mutable_unchecked<3>();
auto coors_rw = coors.mutable_unchecked<2>();
auto num_points_per_voxel_rw = num_points_per_voxel.mutable_unchecked<1>();
auto coor_to_voxelidx_rw = coor_to_voxelidx.mutable_unchecked<NDim>();
auto N = points_rw.shape(0);
auto num_features = points_rw.shape(1);
// auto ndim = points_rw.shape(1) - 1;
constexpr int ndim_minus_1 = NDim - 1;
int voxel_num = 0;
bool failed = false;
int coor[NDim];
int c;
int grid_size[NDim];
for (int i = 0; i < NDim; ++i) {
grid_size[i] =
round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);
}
int voxelidx, num;
for (int i = 0; i < N; ++i) {
failed = false;
for (int j = 0; j < NDim; ++j) {
c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]);
if ((c < 0 || c >= grid_size[j])) {
failed = true;
break;
}
coor[ndim_minus_1 - j] = c;
}
if (failed)
continue;
voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]);
if (voxelidx == -1) {
voxelidx = voxel_num;
if (voxel_num >= max_voxels)
break;
voxel_num += 1;
coor_to_voxelidx_rw(coor[0], coor[1], coor[2]) = voxelidx;
for (int k = 0; k < NDim; ++k) {
coors_rw(voxelidx, k) = coor[k];
}
}
num = num_points_per_voxel_rw(voxelidx);
if (num < max_points) {
for (int k = 0; k < num_features; ++k) {
voxels_rw(voxelidx, num, k) = points_rw(i, k);
}
num_points_per_voxel_rw(voxelidx) += 1;
for (int k = 0; k < num_features; ++k) {
means_rw(voxelidx, k) += (points_rw(i, k) - means_rw(voxelidx, k)) / DType(num + 1);
}
}
}
for (int i = 0; i < voxel_num; ++i) {
coor_to_voxelidx_rw(coors_rw(i, 0), coors_rw(i, 1), coors_rw(i, 2)) = -1;
num = num_points_per_voxel_rw(i);
for (int j = num; j < max_points; ++j){
for (int k = 0; k < num_features; ++k) {
voxels_rw(i, j, k) = means_rw(i, k);
}
}
}
return voxel_num;
}
template <typename DType, int NDim>
int points_to_voxel_3d_np_height(py::array_t<DType> points, py::array_t<DType> voxels,
py::array_t<DType> height,
py::array_t<DType> maxs,
py::array_t<int> coors,
py::array_t<int> num_points_per_voxel,
py::array_t<int> coor_to_voxelidx,
std::vector<DType> voxel_size,
std::vector<DType> coors_range, int max_points,
int max_voxels) {
auto points_rw = points.template mutable_unchecked<2>();
auto height_rw = height.template mutable_unchecked<2>();
auto maxs_rw = maxs.template mutable_unchecked<2>();
auto voxels_rw = voxels.template mutable_unchecked<3>();
auto coors_rw = coors.mutable_unchecked<2>();
auto num_points_per_voxel_rw = num_points_per_voxel.mutable_unchecked<1>();
auto coor_to_voxelidx_rw = coor_to_voxelidx.mutable_unchecked<NDim>();
auto N = points_rw.shape(0);
auto num_features = points_rw.shape(1);
// auto ndim = points_rw.shape(1) - 1;
constexpr int ndim_minus_1 = NDim - 1;
int voxel_num = 0;
bool failed = false;
int coor[NDim];
int c;
int grid_size[NDim];
for (int i = 0; i < NDim; ++i) {
grid_size[i] =
round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);
}
int voxelidx, num;
for (int i = 0; i < N; ++i) {
failed = false;
for (int j = 0; j < NDim; ++j) {
c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]);
if ((c < 0 || c >= grid_size[j])) {
failed = true;
break;
}
coor[ndim_minus_1 - j] = c;
}
if (failed)
continue;
voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]);
if (voxelidx == -1) {
voxelidx = voxel_num;
if (voxel_num >= max_voxels)
break;
voxel_num += 1;
coor_to_voxelidx_rw(coor[0], coor[1], coor[2]) = voxelidx;
for (int k = 0; k < NDim; ++k) {
coors_rw(voxelidx, k) = coor[k];
}
}
num = num_points_per_voxel_rw(voxelidx);
if (num < max_points) {
for (int k = 0; k < num_features; ++k) {
voxels_rw(voxelidx, num, k) = points_rw(i, k);
height_rw(voxelidx, k) = std::min(points_rw(i, k), height_rw(voxelidx, k));
maxs_rw(voxelidx, k) = std::max(points_rw(i, k), maxs_rw(voxelidx, k));
}
num_points_per_voxel_rw(voxelidx) += 1;
}
}
for (int i = 0; i < voxel_num; ++i) {
coor_to_voxelidx_rw(coors_rw(i, 0), coors_rw(i, 1), coors_rw(i, 2)) = -1;
for (int k = 0; k < num_features; ++k) {
height_rw(i, k) = maxs_rw(i, k) - height_rw(i, k);
}
}
return voxel_num;
}
template <typename DType, int NDim>
int block_filtering(py::array_t<DType> points,
py::array_t<int> mask,
py::array_t<DType> height,
py::array_t<DType> maxs,
py::array_t<int> coor_to_voxelidx,
std::vector<DType> voxel_size,
std::vector<DType> coors_range,
int max_voxels,
DType eps) {
auto points_rw = points.template mutable_unchecked<2>();
auto height_rw = height.template mutable_unchecked<1>();
auto maxs_rw = maxs.template mutable_unchecked<1>();
auto coor_to_voxelidx_rw = coor_to_voxelidx.mutable_unchecked<NDim>();
auto N = points_rw.shape(0);
auto num_features = points_rw.shape(1);
// auto ndim = points_rw.shape(1) - 1;
constexpr int ndim_minus_1 = NDim - 1;
int voxel_num = 0;
bool failed = false;
int coor[NDim];
int c;
int grid_size[NDim];
for (int i = 0; i < NDim; ++i) {
grid_size[i] =
round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);
}
int voxelidx, num;
for (int i = 0; i < N; ++i) {
failed = false;
for (int j = 0; j < NDim; ++j) {
c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]);
if ((c < 0 || c >= grid_size[j])) {
failed = true;
break;
}
coor[ndim_minus_1 - j] = c;
}
if (failed)
continue;
voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]);
if (voxelidx == -1) {
voxelidx = voxel_num;
voxel_num += 1;
coor_to_voxelidx_rw(coor[0], coor[1], coor[2]) = voxelidx;
}
height_rw(voxelidx) = std::min(points_rw(i, 2), height_rw(voxelidx));
maxs_rw(voxelidx) = std::max(points_rw(i, 2), maxs_rw(voxelidx));
}
for (int i = 0; i < N; ++i) {
failed = false;
for (int j = 0; j < NDim; ++j) {
c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]);
if ((c < 0 || c >= grid_size[j])) {
failed = true;
break;
}
coor[ndim_minus_1 - j] = c;
}
if (failed)
continue;
voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]);
if ((maxs_rw(voxelidx) - height_rw(voxelidx, 2)) < eps){
mask(i) = 0;
}
}
}
template <typename DType, int NDim>
int points_to_voxel_3d_with_filtering(py::array_t<DType> points, py::array_t<DType> voxels,
py::array_t<int> voxel_mask,
py::array_t<DType> mins,
py::array_t<DType> maxs,
py::array_t<int> coors,
py::array_t<int> num_points_per_voxel,
py::array_t<int> coor_to_voxelidx,
std::vector<DType> voxel_size,
std::vector<DType> coors_range,
int max_points,
int max_voxels,
int block_factor,
int block_size,
DType height_threshold) {
auto points_rw = points.template mutable_unchecked<2>();
auto mins_rw = mins.template mutable_unchecked<2>();
auto maxs_rw = maxs.template mutable_unchecked<2>();
auto voxels_rw = voxels.template mutable_unchecked<3>();
auto voxel_mask_rw = voxel_mask.template mutable_unchecked<1>();
auto coors_rw = coors.mutable_unchecked<2>();
auto num_points_per_voxel_rw = num_points_per_voxel.mutable_unchecked<1>();
auto coor_to_voxelidx_rw = coor_to_voxelidx.mutable_unchecked<NDim>();
auto N = points_rw.shape(0);
auto num_features = points_rw.shape(1);
// auto ndim = points_rw.shape(1) - 1;
constexpr int ndim_minus_1 = NDim - 1;
int voxel_num = 0;
bool failed = false;
int coor[NDim];
int c;
int grid_size[NDim];
DType max_value, min_value;
for (int i = 0; i < NDim; ++i) {
grid_size[i] =
round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);
}
int block_shape_H = grid_size[1] / block_factor;
int block_shape_W = grid_size[0] / block_factor;
int voxelidx, num;
int block_coor[2];
int startx, stopx, starty, stopy;
for (int i = 0; i < N; ++i) {
failed = false;
for (int j = 0; j < NDim; ++j) {
c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]);
if ((c < 0 || c >= grid_size[j])) {
failed = true;
break;
}
coor[ndim_minus_1 - j] = c;
}
if (failed)
continue;
voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]);
if (voxelidx == -1) {
voxelidx = voxel_num;
if (voxel_num >= max_voxels)
break;
voxel_num += 1;
coor_to_voxelidx_rw(coor[0], coor[1], coor[2]) = voxelidx;
for (int k = 0; k < NDim; ++k) {
coors_rw(voxelidx, k) = coor[k];
}
}
num = num_points_per_voxel_rw(voxelidx);
if (num < max_points) {
for (int k = 0; k < num_features; ++k) {
voxels_rw(voxelidx, num, k) = points_rw(i, k);
}
block_coor[0] = coor[1] / block_factor;
block_coor[1] = coor[2] / block_factor;
mins_rw(block_coor[0], block_coor[1]) = std::min(points_rw(i, 2), mins_rw(block_coor[0], block_coor[1]));
maxs_rw(block_coor[0], block_coor[1]) = std::max(points_rw(i, 2), maxs_rw(block_coor[0], block_coor[1]));
num_points_per_voxel_rw(voxelidx) += 1;
}
}
for (int i = 0; i < voxel_num; ++i) {
coor[1] = coors_rw(i, 1);
coor[2] = coors_rw(i, 2);
coor_to_voxelidx_rw(coors_rw(i, 0), coor[1], coor[2]) = -1;
block_coor[0] = coor[1] / block_factor;
block_coor[1] = coor[2] / block_factor;
min_value = mins_rw(block_coor[0], block_coor[1]);
max_value = maxs_rw(block_coor[0], block_coor[1]);
startx = std::max(0, block_coor[0]-block_size/2);
stopx = std::min(block_shape_H, block_coor[0]+block_size-block_size/2);
starty = std::max(0, block_coor[1]-block_size/2);
stopy = std::min(block_shape_W, block_coor[1]+block_size-block_size/2);
for (int j = startx; j < stopx; ++j){
for (int k = starty; k < stopy; ++k){
min_value = std::min(min_value, mins_rw(j, k));
max_value = std::max(max_value, maxs_rw(j, k));
}
}
voxel_mask_rw(i) = (max_value - min_value) > height_threshold;
} }
return voxel_num; return voxel_num;
} }
} // namespace spconv } // namespace spconv
\ No newline at end of file
...@@ -24,6 +24,8 @@ from spconv.conv import SparseInverseConv2d, SparseInverseConv3d ...@@ -24,6 +24,8 @@ from spconv.conv import SparseInverseConv2d, SparseInverseConv3d
from spconv.modules import SparseModule, SparseSequential from spconv.modules import SparseModule, SparseSequential
from spconv.pool import SparseMaxPool2d, SparseMaxPool3d from spconv.pool import SparseMaxPool2d, SparseMaxPool3d
from spconv import ops
_LIB_FILE_NAME = "libspconv.so" _LIB_FILE_NAME = "libspconv.so"
if platform.system() == "Windows": if platform.system() == "Windows":
_LIB_FILE_NAME = "spconv.dll" _LIB_FILE_NAME = "spconv.dll"
......
# Copyright 2019 Yan Yan # Copyright 2019 Yan Yan
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...@@ -63,7 +63,8 @@ class SparseConvolution(SparseModule): ...@@ -63,7 +63,8 @@ class SparseConvolution(SparseModule):
output_padding=0, output_padding=0,
transposed=False, transposed=False,
inverse=False, inverse=False,
indice_key=None): indice_key=None,
fused_bn=False):
super(SparseConvolution, self).__init__() super(SparseConvolution, self).__init__()
assert groups == 1 assert groups == 1
if not isinstance(kernel_size, (list, tuple)): if not isinstance(kernel_size, (list, tuple)):
...@@ -94,6 +95,7 @@ class SparseConvolution(SparseModule): ...@@ -94,6 +95,7 @@ class SparseConvolution(SparseModule):
self.groups = groups self.groups = groups
self.subm = subm self.subm = subm
self.indice_key = indice_key self.indice_key = indice_key
self.fused_bn = fused_bn
self.weight = Parameter( self.weight = Parameter(
torch.Tensor(*kernel_size, in_channels, out_channels)) torch.Tensor(*kernel_size, in_channels, out_channels))
...@@ -121,10 +123,12 @@ class SparseConvolution(SparseModule): ...@@ -121,10 +123,12 @@ class SparseConvolution(SparseModule):
if not self.subm: if not self.subm:
if self.transposed: if self.transposed:
out_spatial_shape = ops.get_deconv_output_size( out_spatial_shape = ops.get_deconv_output_size(
spatial_shape, self.kernel_size, self.stride, self.padding, self.dilation, self.output_padding) spatial_shape, self.kernel_size, self.stride, self.padding,
self.dilation, self.output_padding)
else: else:
out_spatial_shape = ops.get_conv_output_size( out_spatial_shape = ops.get_conv_output_size(
spatial_shape, self.kernel_size, self.stride, self.padding, self.dilation) spatial_shape, self.kernel_size, self.stride, self.padding,
self.dilation)
else: else:
out_spatial_shape = spatial_shape out_spatial_shape = spatial_shape
...@@ -136,8 +140,8 @@ class SparseConvolution(SparseModule): ...@@ -136,8 +140,8 @@ class SparseConvolution(SparseModule):
self.weight.view(self.in_channels, self.out_channels)) self.weight.view(self.in_channels, self.out_channels))
if self.bias is not None: if self.bias is not None:
features += self.bias features += self.bias
out_tensor = spconv.SparseConvTensor(features, input.indices, out_tensor = spconv.SparseConvTensor(
input.spatial_shape, input.batch_size) features, input.indices, input.spatial_shape, input.batch_size)
out_tensor.indice_dict = input.indice_dict out_tensor.indice_dict = input.indice_dict
out_tensor.grid = input.grid out_tensor.grid = input.grid
return out_tensor return out_tensor
...@@ -145,32 +149,54 @@ class SparseConvolution(SparseModule): ...@@ -145,32 +149,54 @@ class SparseConvolution(SparseModule):
if self.inverse: if self.inverse:
assert datas is not None and self.indice_key is not None assert datas is not None and self.indice_key is not None
_, outids, indice_pairs, indice_pair_num, out_spatial_shape = datas _, outids, indice_pairs, indice_pair_num, out_spatial_shape = datas
assert indice_pairs.shape[0] == np.prod(self.kernel_size), "inverse conv must have same kernel size as its couple conv" assert indice_pairs.shape[0] == np.prod(
self.kernel_size
), "inverse conv must have same kernel size as its couple conv"
else: else:
if self.indice_key is not None and datas is not None: if self.indice_key is not None and datas is not None:
outids, _, indice_pairs, indice_pair_num, _ = datas outids, _, indice_pairs, indice_pair_num, _ = datas
else: else:
outids, indice_pairs, indice_pair_num = ops.get_indice_pairs( outids, indice_pairs, indice_pair_num = ops.get_indice_pairs(
indices, batch_size, spatial_shape, self.kernel_size, indices,
self.stride, self.padding, self.dilation, self.output_padding, self.subm, self.transposed, grid=input.grid) batch_size,
input.indice_dict[self.indice_key] = (outids, indices, indice_pairs, indice_pair_num, spatial_shape) spatial_shape,
if self.subm: self.kernel_size,
out_features = Fsp.indice_subm_conv(features, self.weight, self.stride,
indice_pairs.to(device), self.padding,
indice_pair_num, self.dilation,
outids.shape[0]) self.output_padding,
self.subm,
self.transposed,
grid=input.grid)
input.indice_dict[self.indice_key] = (outids, indices,
indice_pairs,
indice_pair_num,
spatial_shape)
if self.fused_bn:
assert self.bias is not None
out_features = ops.fused_indice_conv(features, self.weight, self.bias, indice_pairs.to(device),
indice_pair_num,
outids.shape[0], self.inverse, self.subm)
else: else:
if self.inverse: if self.subm:
out_features = Fsp.indice_inverse_conv(features, out_features = Fsp.indice_subm_conv(features, self.weight,
self.weight, indice_pairs.to(device), indice_pairs.to(device),
indice_pair_num, outids.shape[0]) indice_pair_num,
outids.shape[0])
else: else:
out_features = Fsp.indice_conv(features, if self.inverse:
self.weight, indice_pairs.to(device), out_features = Fsp.indice_inverse_conv(features, self.weight,
indice_pair_num, outids.shape[0]) indice_pairs.to(device),
indice_pair_num,
outids.shape[0])
else:
out_features = Fsp.indice_conv(features, self.weight,
indice_pairs.to(device),
indice_pair_num,
outids.shape[0])
if self.bias is not None: if self.bias is not None:
out_features += self.bias out_features += self.bias
out_tensor = spconv.SparseConvTensor(out_features, outids, out_tensor = spconv.SparseConvTensor(out_features, outids,
out_spatial_shape, batch_size) out_spatial_shape, batch_size)
out_tensor.indice_dict = input.indice_dict out_tensor.indice_dict = input.indice_dict
...@@ -225,6 +251,30 @@ class SparseConv3d(SparseConvolution): ...@@ -225,6 +251,30 @@ class SparseConv3d(SparseConvolution):
bias, bias,
indice_key=indice_key) indice_key=indice_key)
class SparseConv4d(SparseConvolution):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
bias=True,
indice_key=None):
super(SparseConv4d, self).__init__(
4,
in_channels,
out_channels,
kernel_size,
stride,
padding,
dilation,
groups,
bias,
indice_key=indice_key)
class SparseConvTranspose2d(SparseConvolution): class SparseConvTranspose2d(SparseConvolution):
def __init__(self, def __init__(self,
in_channels, in_channels,
...@@ -260,7 +310,6 @@ class SparseConvTranspose3d(SparseConvolution): ...@@ -260,7 +310,6 @@ class SparseConvTranspose3d(SparseConvolution):
dilation=1, dilation=1,
groups=1, groups=1,
bias=True, bias=True,
indice_key=None): indice_key=None):
super(SparseConvTranspose3d, self).__init__( super(SparseConvTranspose3d, self).__init__(
3, 3,
...@@ -275,6 +324,7 @@ class SparseConvTranspose3d(SparseConvolution): ...@@ -275,6 +324,7 @@ class SparseConvTranspose3d(SparseConvolution):
transposed=True, transposed=True,
indice_key=indice_key) indice_key=indice_key)
class SparseInverseConv2d(SparseConvolution): class SparseInverseConv2d(SparseConvolution):
def __init__(self, def __init__(self,
in_channels, in_channels,
...@@ -357,3 +407,27 @@ class SubMConv3d(SparseConvolution): ...@@ -357,3 +407,27 @@ class SubMConv3d(SparseConvolution):
bias, bias,
True, True,
indice_key=indice_key) indice_key=indice_key)
class SubMConv4d(SparseConvolution):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
bias=True,
indice_key=None):
super(SubMConv4d, self).__init__(
4,
in_channels,
out_channels,
kernel_size,
stride,
padding,
dilation,
groups,
bias,
True,
indice_key=indice_key)
...@@ -111,7 +111,6 @@ class SparseMaxPoolFunction(Function): ...@@ -111,7 +111,6 @@ class SparseMaxPoolFunction(Function):
input_bp = ops.indice_maxpool_backward(features, out, grad_output, indice_pairs, indice_pair_num) input_bp = ops.indice_maxpool_backward(features, out, grad_output, indice_pairs, indice_pair_num)
return input_bp, None, None, None return input_bp, None, None, None
indice_conv = SparseConvFunction.apply indice_conv = SparseConvFunction.apply
indice_inverse_conv = SparseInverseConvFunction.apply indice_inverse_conv = SparseInverseConvFunction.apply
indice_subm_conv = SubMConvFunction.apply indice_subm_conv = SubMConvFunction.apply
......
# Copyright 2019 Yan Yan # Copyright 2019 Yan Yan
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...@@ -17,12 +17,19 @@ from collections import OrderedDict ...@@ -17,12 +17,19 @@ from collections import OrderedDict
import spconv import spconv
import torch import torch
from torch import nn from torch import nn
import time import time
def is_spconv_module(module): def is_spconv_module(module):
spconv_modules = (SparseModule,) spconv_modules = (SparseModule, )
return isinstance(module, spconv_modules) return isinstance(module, spconv_modules)
def is_sparse_conv(module):
from spconv.conv import SparseConvolution
return isinstance(module, SparseConvolution)
def _mean_update(vals, m_vals, t): def _mean_update(vals, m_vals, t):
outputs = [] outputs = []
if not isinstance(vals, list): if not isinstance(vals, list):
...@@ -104,7 +111,7 @@ class SparseSequential(SparseModule): ...@@ -104,7 +111,7 @@ class SparseSequential(SparseModule):
def __len__(self): def __len__(self):
return len(self._modules) return len(self._modules)
@property @property
def sparity_dict(self): def sparity_dict(self):
return self._sparity_dict return self._sparity_dict
...@@ -117,7 +124,7 @@ class SparseSequential(SparseModule): ...@@ -117,7 +124,7 @@ class SparseSequential(SparseModule):
def forward(self, input): def forward(self, input):
for k, module in self._modules.items(): for k, module in self._modules.items():
if is_spconv_module(module): # use SpConvTensor as input if is_spconv_module(module): # use SpConvTensor as input
assert isinstance(input, spconv.SparseConvTensor) assert isinstance(input, spconv.SparseConvTensor)
self._sparity_dict[k] = input.sparity self._sparity_dict[k] = input.sparity
input = module(input) input = module(input)
...@@ -128,3 +135,50 @@ class SparseSequential(SparseModule): ...@@ -128,3 +135,50 @@ class SparseSequential(SparseModule):
else: else:
input = module(input) input = module(input)
return input return input
def fused(self):
"""don't use this. no effect.
"""
from spconv.conv import SparseConvolution
mods = [v for k, v in self._modules.items()]
fused_mods = []
idx = 0
while idx < len(mods):
if is_sparse_conv(mods[idx]):
if idx < len(mods) - 1 and isinstance(mods[idx + 1], nn.BatchNorm1d):
new_module = SparseConvolution(
ndim=mods[idx].ndim,
in_channels=mods[idx].in_channels,
out_channels=mods[idx].out_channels,
kernel_size=mods[idx].kernel_size,
stride=mods[idx].stride,
padding=mods[idx].padding,
dilation=mods[idx].dilation,
groups=mods[idx].groups,
bias=True,
subm=mods[idx].subm,
output_padding=mods[idx].output_padding,
transposed=mods[idx].transposed,
inverse=mods[idx].inverse,
indice_key=mods[idx].indice_key,
fused_bn=True,
)
new_module.load_state_dict(mods[idx].state_dict(), False)
new_module.to(mods[idx].weight.device)
conv = new_module
bn = mods[idx + 1]
conv.bias.data.zero_()
conv.weight.data[:] = conv.weight.data * bn.weight.data / (
torch.sqrt(bn.running_var) + bn.eps)
conv.bias.data[:] = (
conv.bias.data - bn.running_mean) * bn.weight.data / (
torch.sqrt(bn.running_var) + bn.eps) + bn.bias.data
fused_mods.append(conv)
idx += 2
else:
fused_mods.append(mods[idx])
idx += 1
else:
fused_mods.append(mods[idx])
idx += 1
return SparseSequential(*fused_mods)
...@@ -83,6 +83,8 @@ def get_indice_pairs(indices, ...@@ -83,6 +83,8 @@ def get_indice_pairs(indices,
get_indice_pairs_func = torch.ops.spconv.get_indice_pairs_2d get_indice_pairs_func = torch.ops.spconv.get_indice_pairs_2d
elif ndim == 3: elif ndim == 3:
get_indice_pairs_func = torch.ops.spconv.get_indice_pairs_3d get_indice_pairs_func = torch.ops.spconv.get_indice_pairs_3d
elif ndim == 4:
get_indice_pairs_func = torch.ops.spconv.get_indice_pairs_4d
else: else:
raise NotImplementedError raise NotImplementedError
return get_indice_pairs_func(indices, batch_size, out_shape, spatial_shape, ksize, return get_indice_pairs_func(indices, batch_size, out_shape, spatial_shape, ksize,
...@@ -117,6 +119,21 @@ def indice_conv(features, ...@@ -117,6 +119,21 @@ def indice_conv(features,
else: else:
raise NotImplementedError raise NotImplementedError
def fused_indice_conv(features, filters, bias,
indice_pairs,
indice_pair_num,
num_activate_out, inverse, subm):
if features.dtype == torch.half:
func = torch.ops.spconv.fused_indice_conv_half
elif filters.dtype == torch.float32:
func = torch.ops.spconv.fused_indice_conv_fp32
else:
raise NotImplementedError
return func(features, filters, bias, indice_pairs,
indice_pair_num, num_activate_out,
int(inverse), int(subm))
def indice_conv_backward(features, def indice_conv_backward(features,
filters, filters,
...@@ -155,3 +172,8 @@ def indice_maxpool_backward(features, out_features, out_bp, indice_pairs, indice ...@@ -155,3 +172,8 @@ def indice_maxpool_backward(features, out_features, out_bp, indice_pairs, indice
features, out_features, out_bp, indice_pairs, indice_pair_num) features, out_features, out_bp, indice_pairs, indice_pair_num)
else: else:
raise NotImplementedError raise NotImplementedError
def nms(boxes, scores, pre_max_size, post_max_size, thresh, eps):
res = torch.ops.spconv.nms(
boxes, scores, pre_max_size, post_max_size, thresh, eps)
return res
# Copyright 2019 Yan Yan # Copyright 2019 Yan Yan
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...@@ -14,16 +14,26 @@ ...@@ -14,16 +14,26 @@
import numpy as np import numpy as np
from spconv import spconv_utils from spconv import spconv_utils
from spconv.spconv_utils import (non_max_suppression, non_max_suppression_cpu, from spconv.spconv_utils import (
points_to_voxel_3d_np, rbbox_iou, non_max_suppression, non_max_suppression_cpu, points_to_voxel_3d_np,
rotate_non_max_suppression_cpu, rbbox_intersection) rbbox_iou, points_to_voxel_3d_np_mean, points_to_voxel_3d_np_height,
points_to_voxel_3d_with_filtering, rotate_non_max_suppression_cpu,
rbbox_intersection)
def points_to_voxel(points, def points_to_voxel(points,
voxel_size, voxel_size,
coors_range, coors_range,
coor_to_voxelidx, coor_to_voxelidx,
max_points=35, max_points=35,
max_voxels=20000): max_voxels=20000,
full_mean=False,
with_height=False,
block_filtering=True,
block_factor=1,
block_size=8,
height_threshold=0.2,
pad_output=False):
"""convert 3d points(N, >=3) to voxels. This version calculate """convert 3d points(N, >=3) to voxels. This version calculate
everything in one loop. now it takes only 0.8ms(~6k voxels) everything in one loop. now it takes only 0.8ms(~6k voxels)
with c++ and 3.2ghz cpu. with c++ and 3.2ghz cpu.
...@@ -39,12 +49,18 @@ def points_to_voxel(points, ...@@ -39,12 +49,18 @@ def points_to_voxel(points,
max_voxels: int. indicate maximum voxels this function create. max_voxels: int. indicate maximum voxels this function create.
for voxelnet, 20000 is a good choice. you should shuffle points for voxelnet, 20000 is a good choice. you should shuffle points
before call this function because max_voxels may drop some points. before call this function because max_voxels may drop some points.
full_mean: bool. if true, all empty points in voxel will be filled with mean
of exist points.
with_height: bool. don't use this.
block_filtering: filter voxels by height. used for lidar point cloud.
use some visualization tool to see filtered result.
Returns: Returns:
voxels: [M, max_points, ndim] float tensor. only contain points. voxels: [M, max_points, ndim] float tensor. only contain points.
coordinates: [M, 3] int32 tensor. zyx format. coordinates: [M, 3] int32 tensor. zyx format.
num_points_per_voxel: [M] int32 tensor. num_points_per_voxel: [M] int32 tensor.
""" """
if full_mean:
assert block_filtering is False
if not isinstance(voxel_size, np.ndarray): if not isinstance(voxel_size, np.ndarray):
voxel_size = np.array(voxel_size, dtype=points.dtype) voxel_size = np.array(voxel_size, dtype=points.dtype)
if not isinstance(coors_range, np.ndarray): if not isinstance(coors_range, np.ndarray):
...@@ -56,20 +72,76 @@ def points_to_voxel(points, ...@@ -56,20 +72,76 @@ def points_to_voxel(points,
voxels = np.zeros( voxels = np.zeros(
shape=(max_voxels, max_points, points.shape[-1]), dtype=points.dtype) shape=(max_voxels, max_points, points.shape[-1]), dtype=points.dtype)
coors = np.zeros(shape=(max_voxels, 3), dtype=np.int32) coors = np.zeros(shape=(max_voxels, 3), dtype=np.int32)
voxel_num = points_to_voxel_3d_np( res = {
points, voxels, coors, num_points_per_voxel, coor_to_voxelidx, "voxels": voxels,
voxel_size.tolist(), coors_range.tolist(), max_points, max_voxels) "coordinates": coors,
# coors = coors[:voxel_num] "num_points_per_voxel": num_points_per_voxel,
# voxels = voxels[:voxel_num] }
# num_points_per_voxel = num_points_per_voxel[:voxel_num] if full_mean:
return voxels, coors, num_points_per_voxel, voxel_num means = np.zeros(
shape=(max_voxels, points.shape[-1]), dtype=points.dtype)
voxel_num = points_to_voxel_3d_np_mean(
points, voxels, means, coors,
num_points_per_voxel, coor_to_voxelidx, voxel_size.tolist(),
coors_range.tolist(), max_points, max_voxels)
else:
if with_height:
heights = np.zeros(
shape=(max_voxels, points.shape[-1]), dtype=points.dtype)
maxs = np.zeros(
shape=(max_voxels, points.shape[-1]), dtype=points.dtype)
res["heights"] = heights
voxel_num = points_to_voxel_3d_np_height(
points, voxels, heights, maxs, coors,
num_points_per_voxel, coor_to_voxelidx, voxel_size.tolist(),
coors_range.tolist(), max_points, max_voxels)
else:
if block_filtering:
block_shape = [*voxelmap_shape[1:]]
block_shape = [b // block_factor for b in block_shape]
mins = np.full(block_shape, 99999999, dtype=points.dtype)
maxs = np.full(block_shape, -99999999, dtype=points.dtype)
voxel_mask = np.zeros((max_voxels, ), dtype=np.int32)
voxel_num = points_to_voxel_3d_with_filtering(
points, voxels, voxel_mask, mins, maxs,
coors, num_points_per_voxel, coor_to_voxelidx,
voxel_size.tolist(), coors_range.tolist(), max_points,
max_voxels, block_factor, block_size, height_threshold)
voxel_mask = voxel_mask.astype(np.bool_)
coors_ = coors[voxel_mask]
if pad_output:
res["coordinates"][:voxel_num] = coors_
res["voxels"][:voxel_num] = voxels[voxel_mask]
res["num_points_per_voxel"][:
voxel_num] = num_points_per_voxel[
voxel_mask]
res["coordinates"][voxel_num:] = 0
res["voxels"][voxel_num:] = 0
res["num_points_per_voxel"][voxel_num:] = 0
else:
res["coordinates"] = coors_
res["voxels"] = voxels[voxel_mask]
res["num_points_per_voxel"] = num_points_per_voxel[
voxel_mask]
voxel_num = coors_.shape[0]
else:
voxel_num = points_to_voxel_3d_np(points, voxels, coors,
num_points_per_voxel,
coor_to_voxelidx,
voxel_size.tolist(),
coors_range.tolist(),
max_points, max_voxels)
res["voxel_num"] = voxel_num
return res
class VoxelGenerator: class VoxelGenerator:
def __init__(self, def __init__(self,
voxel_size, voxel_size,
point_cloud_range, point_cloud_range,
max_num_points, max_num_points,
max_voxels=20000): max_voxels=20000,
full_mean=True):
point_cloud_range = np.array(point_cloud_range, dtype=np.float32) point_cloud_range = np.array(point_cloud_range, dtype=np.float32)
# [0, -40, -3, 70.4, 40, 1] # [0, -40, -3, 70.4, 40, 1]
voxel_size = np.array(voxel_size, dtype=np.float32) voxel_size = np.array(voxel_size, dtype=np.float32)
...@@ -85,12 +157,17 @@ class VoxelGenerator: ...@@ -85,12 +157,17 @@ class VoxelGenerator:
self._max_num_points = max_num_points self._max_num_points = max_num_points
self._max_voxels = max_voxels self._max_voxels = max_voxels
self._grid_size = grid_size self._grid_size = grid_size
self._full_mean = full_mean
def generate(self, points, max_voxels=None): def generate(self, points, max_voxels=None):
res = points_to_voxel( res = points_to_voxel(points, self._voxel_size,
points, self._voxel_size, self._point_cloud_range, self._coor_to_voxelidx, self._point_cloud_range, self._coor_to_voxelidx,
self._max_num_points, max_voxels or self._max_voxels) self._max_num_points, max_voxels
voxels, coors, num_points_per_voxel, voxel_num = res or self._max_voxels, self._full_mean)
voxels = res["voxels"]
coors = res["coordinates"]
num_points_per_voxel = res["num_points_per_voxel"]
voxel_num = res["voxel_num"]
coors = coors[:voxel_num] coors = coors[:voxel_num]
voxels = voxels[:voxel_num] voxels = voxels[:voxel_num]
num_points_per_voxel = num_points_per_voxel[:voxel_num] num_points_per_voxel = num_points_per_voxel[:voxel_num]
...@@ -98,11 +175,99 @@ class VoxelGenerator: ...@@ -98,11 +175,99 @@ class VoxelGenerator:
return (voxels, coors, num_points_per_voxel) return (voxels, coors, num_points_per_voxel)
def generate_multi_gpu(self, points, max_voxels=None): def generate_multi_gpu(self, points, max_voxels=None):
res = points_to_voxel(points, self._voxel_size,
self._point_cloud_range, self._coor_to_voxelidx,
self._max_num_points, max_voxels
or self._max_voxels, self._full_mean)
voxels = res["voxels"]
coors = res["coordinates"]
num_points_per_voxel = res["num_points_per_voxel"]
voxel_num = res["voxel_num"]
return (voxels, coors, num_points_per_voxel)
@property
def voxel_size(self):
return self._voxel_size
@property
def max_num_points_per_voxel(self):
return self._max_num_points
@property
def point_cloud_range(self):
return self._point_cloud_range
@property
def grid_size(self):
return self._grid_size
class VoxelGeneratorV2:
def __init__(self,
voxel_size,
point_cloud_range,
max_num_points,
max_voxels=20000,
full_mean=False,
with_height=False,
block_filtering=False,
block_factor=8,
block_size=3,
height_threshold=0.1):
assert with_height is False, "don't use this."
assert full_mean is False, "don't use this."
point_cloud_range = np.array(point_cloud_range, dtype=np.float32)
# [0, -40, -3, 70.4, 40, 1]
voxel_size = np.array(voxel_size, dtype=np.float32)
grid_size = (
point_cloud_range[3:] - point_cloud_range[:3]) / voxel_size
grid_size = np.round(grid_size).astype(np.int64)
voxelmap_shape = tuple(np.round(grid_size).astype(np.int32).tolist())
voxelmap_shape = voxelmap_shape[::-1]
assert grid_size[0] % block_factor == 0
assert grid_size[1] % block_factor == 0
self._coor_to_voxelidx = np.full(voxelmap_shape, -1, dtype=np.int32)
self._voxel_size = voxel_size
self._point_cloud_range = point_cloud_range
self._max_num_points = max_num_points
self._max_voxels = max_voxels
self._grid_size = grid_size
self._full_mean = full_mean
self._with_height = with_height
self._block_filtering = block_filtering
self._block_factor = block_factor
self._height_threshold = height_threshold
self._block_size = block_size
assert block_size > 0
def generate(self, points, max_voxels=None):
res = points_to_voxel( res = points_to_voxel(
points, self._voxel_size, self._point_cloud_range, self._coor_to_voxelidx, points, self._voxel_size, self._point_cloud_range,
self._max_num_points, max_voxels or self._max_voxels) self._coor_to_voxelidx, self._max_num_points, max_voxels
or self._max_voxels, self._full_mean, self._with_height,
self._block_filtering, self._block_factor, self._block_size,
self._height_threshold)
for k, v in res.items():
if k != "voxel_num":
res[k] = v[:res["voxel_num"]]
return res return res
def generate_multi_gpu(self, points, max_voxels=None):
res = points_to_voxel(
points,
self._voxel_size,
self._point_cloud_range,
self._coor_to_voxelidx,
self._max_num_points,
max_voxels or self._max_voxels,
self._full_mean,
self._with_height,
self._block_filtering,
self._block_factor,
self._block_size,
self._height_threshold,
pad_output=True)
return res
@property @property
def voxel_size(self): def voxel_size(self):
...@@ -112,7 +277,6 @@ class VoxelGenerator: ...@@ -112,7 +277,6 @@ class VoxelGenerator:
def max_num_points_per_voxel(self): def max_num_points_per_voxel(self):
return self._max_num_points return self._max_num_points
@property @property
def point_cloud_range(self): def point_cloud_range(self):
return self._point_cloud_range return self._point_cloud_range
......
add_library(spconv SHARED all.cc indice.cc indice.cu add_library(spconv SHARED all.cc indice.cc indice.cu
reordering.cc reordering.cu maxpool.cc maxpool.cu) reordering.cc reordering.cu maxpool.cc maxpool.cu nms.cc)
target_include_directories(spconv PRIVATE ${ALL_INCLUDE} ) target_include_directories(spconv PRIVATE ${ALL_INCLUDE} )
set_property(TARGET spconv PROPERTY CUDA_STANDARD 14) set_property(TARGET spconv PROPERTY CUDA_STANDARD 14)
......
...@@ -15,10 +15,13 @@ ...@@ -15,10 +15,13 @@
#include <cuda_runtime_api.h> #include <cuda_runtime_api.h>
#include <spconv/pool_ops.h> #include <spconv/pool_ops.h>
#include <spconv/spconv_ops.h> #include <spconv/spconv_ops.h>
#include <spconv/fused_spconv_ops.h>
#include <spconv/nms_ops.h>
static auto registry = static auto registry =
torch::jit::RegisterOperators("spconv::get_indice_pairs_2d", &spconv::getIndicePair<2>) torch::jit::RegisterOperators("spconv::get_indice_pairs_2d", &spconv::getIndicePair<2>)
.op("spconv::get_indice_pairs_3d", &spconv::getIndicePair<3>) .op("spconv::get_indice_pairs_3d", &spconv::getIndicePair<3>)
.op("spconv::get_indice_pairs_4d", &spconv::getIndicePair<4>)
.op("spconv::get_indice_pairs_grid_2d", &spconv::getIndicePairPreGrid<2>) .op("spconv::get_indice_pairs_grid_2d", &spconv::getIndicePairPreGrid<2>)
.op("spconv::get_indice_pairs_grid_3d", &spconv::getIndicePairPreGrid<3>) .op("spconv::get_indice_pairs_grid_3d", &spconv::getIndicePairPreGrid<3>)
.op("spconv::indice_conv_fp32", &spconv::indiceConv<float>) .op("spconv::indice_conv_fp32", &spconv::indiceConv<float>)
...@@ -26,9 +29,12 @@ static auto registry = ...@@ -26,9 +29,12 @@ static auto registry =
.op("spconv::indice_conv_half", &spconv::indiceConv<at::Half>) .op("spconv::indice_conv_half", &spconv::indiceConv<at::Half>)
.op("spconv::indice_conv_backward_half", .op("spconv::indice_conv_backward_half",
&spconv::indiceConvBackward<at::Half>) &spconv::indiceConvBackward<at::Half>)
.op("spconv::fused_indice_conv_fp32", &spconv::fusedIndiceConvBatchNorm<float>)
.op("spconv::fused_indice_conv_half", &spconv::fusedIndiceConvBatchNorm<at::Half>)
.op("spconv::indice_maxpool_fp32", &spconv::indiceMaxPool<float>) .op("spconv::indice_maxpool_fp32", &spconv::indiceMaxPool<float>)
.op("spconv::indice_maxpool_backward_fp32", .op("spconv::indice_maxpool_backward_fp32",
&spconv::indiceMaxPoolBackward<float>) &spconv::indiceMaxPoolBackward<float>)
.op("spconv::indice_maxpool_half", &spconv::indiceMaxPool<at::Half>) .op("spconv::indice_maxpool_half", &spconv::indiceMaxPool<at::Half>)
.op("spconv::indice_maxpool_backward_half", .op("spconv::indice_maxpool_backward_half",
&spconv::indiceMaxPoolBackward<at::Half>); &spconv::indiceMaxPoolBackward<at::Half>)
\ No newline at end of file .op("spconv::nms", &spconv::nonMaxSuppression<float>);
\ No newline at end of file
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <boost/geometry.hpp>
#include <spconv/nms_functor.h>
#include <torch/script.h>
#include <vector>
namespace spconv {
namespace functor {
template <typename T, typename Index>
struct NonMaxSupressionFunctor<tv::CPU, T, Index> {
Index operator()(const tv::CPU &d, tv::TensorView<Index> keep,
tv::TensorView<const T> boxes, T threshold, T eps) {
auto ndets = boxes.dim(0);
auto suppressed = std::vector<Index>(ndets);
auto area = std::vector<T>(ndets);
for (int i = 0; i < ndets; ++i) {
area[i] =
(boxes(i, 2) - boxes(i, 0) + eps) * (boxes(i, 3) - boxes(i, 1) + eps);
}
int i, j;
T xx1, xx2, w, h, inter, ovr;
int keepNum = 0;
for (int _i = 0; _i < ndets; ++_i) {
i = _i;
if (suppressed[i] == 1)
continue;
keep[keepNum] = i;
keepNum += 1;
for (int _j = _i + 1; _j < ndets; ++_j) {
j = _j;
if (suppressed[j] == 1)
continue;
xx2 = std::min(boxes(i, 2), boxes(j, 2));
xx1 = std::max(boxes(i, 0), boxes(j, 0));
w = xx2 - xx1 + eps;
if (w > 0) {
xx2 = std::min(boxes(i, 3), boxes(j, 3));
xx1 = std::max(boxes(i, 1), boxes(j, 1));
h = xx2 - xx1 + eps;
if (h > 0) {
inter = w * h;
ovr = inter / (area[i] + area[j] - inter);
if (ovr >= threshold)
suppressed[j] = 1;
}
}
}
}
return keepNum;
}
};
template <typename T, typename Index>
struct rotateNonMaxSupressionFunctor<tv::CPU, T, Index> {
Index operator()(const tv::CPU &d, tv::TensorView<Index> keep,
tv::TensorView<const T> boxCorners,
tv::TensorView<const T> standupIoU, T threshold) {
auto ndets = boxCorners.dim(0);
auto suppressed = std::vector<Index>(ndets);
int i, j;
namespace bg = boost::geometry;
typedef bg::model::point<T, 2, bg::cs::cartesian> point_t;
typedef bg::model::polygon<point_t> polygon_t;
polygon_t poly, qpoly;
std::vector<polygon_t> poly_inter, poly_union;
T inter_area, union_area, overlap;
int keepNum = 0;
for (int _i = 0; _i < ndets; ++_i) {
i = _i;
if (suppressed[i] == 1)
continue;
keep[keepNum] = i;
keepNum += 1;
for (int _j = _i + 1; _j < ndets; ++_j) {
j = _j;
if (suppressed[j] == 1)
continue;
if (standupIoU(i, j) <= 0.0)
continue;
bg::append(poly, point_t(boxCorners(i, 0, 0), boxCorners(i, 0, 1)));
bg::append(poly, point_t(boxCorners(i, 1, 0), boxCorners(i, 1, 1)));
bg::append(poly, point_t(boxCorners(i, 2, 0), boxCorners(i, 2, 1)));
bg::append(poly, point_t(boxCorners(i, 3, 0), boxCorners(i, 3, 1)));
bg::append(poly, point_t(boxCorners(i, 0, 0), boxCorners(i, 0, 1)));
bg::append(qpoly, point_t(boxCorners(j, 0, 0), boxCorners(j, 0, 1)));
bg::append(qpoly, point_t(boxCorners(j, 1, 0), boxCorners(j, 1, 1)));
bg::append(qpoly, point_t(boxCorners(j, 2, 0), boxCorners(j, 2, 1)));
bg::append(qpoly, point_t(boxCorners(j, 3, 0), boxCorners(j, 3, 1)));
bg::append(qpoly, point_t(boxCorners(j, 0, 0), boxCorners(j, 0, 1)));
bg::intersection(poly, qpoly, poly_inter);
if (!poly_inter.empty()) {
inter_area = bg::area(poly_inter.front());
bg::union_(poly, qpoly, poly_union);
if (!poly_union.empty()) { // ignore invalid box
union_area = bg::area(poly_union.front());
overlap = inter_area / union_area;
if (overlap >= threshold)
suppressed[j] = 1;
poly_union.clear();
}
}
poly.clear();
qpoly.clear();
poly_inter.clear();
}
}
return keepNum;
}
};
} // namespace functor
#define DECLARE_CPU_T_INDEX(T, Index) \
template struct functor::NonMaxSupressionFunctor<tv::CPU, T, Index>; \
template struct functor::rotateNonMaxSupressionFunctor<tv::CPU, T, Index>;
#define DECLARE_CPU_INDEX(Index) \
DECLARE_CPU_T_INDEX(float, Index); \
DECLARE_CPU_T_INDEX(double, Index);
DECLARE_CPU_INDEX(int);
DECLARE_CPU_INDEX(long);
#undef DECLARE_CPU_INDEX
#undef DECLARE_CPU_T_INDEX
} // namespace spconv
// ------------------------------------------------------------------
// Deformable Convolutional Networks
// Copyright (c) 2015 Microsoft
// Licensed under The MIT License
// Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
// ------------------------------------------------------------------
#include <ATen/ATen.h>
#include <chrono>
#include <limits>
#include <spconv/mp_helper.h>
#include <spconv/reordering.h>
#include <spconv/reordering.cu.h>
#include <tensorview/helper_kernel.cu.h>
#include <tensorview/helper_launch.h>
#include <tensorview/tensorview.h>
#include <type_traits>
#include <utility/timer.h>
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
int const threadsPerBlock = sizeof(unsigned long long) * 8;
template <typename DType>
__device__ inline DType devIoU(DType const *const a, DType const *const b)
{
DType left = max(a[0], b[0]), right = min(a[2], b[2]);
DType top = max(a[1], b[1]), bottom = min(a[3], b[3]);
DType width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
DType interS = width * height;
DType Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
DType Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
return interS / (Sa + Sb - interS);
}
template <typename DType, int BLOCK_THREADS>
__global__ void nms_kernel(const int n_boxes, const DType nms_overlap_thresh,
const DType *dev_boxes, unsigned long long *dev_mask)
{
const int row_start = blockIdx.y;
const int col_start = blockIdx.x;
// if (row_start > col_start) return;
const int row_size =
min(n_boxes - row_start * BLOCK_THREADS, BLOCK_THREADS);
const int col_size =
min(n_boxes - col_start * BLOCK_THREADS, BLOCK_THREADS);
__shared__ DType block_boxes[BLOCK_THREADS * 5];
if (threadIdx.x < col_size)
{
#pragma unroll
for (int i = 0; i < 5; ++i)
{
block_boxes[threadIdx.x * 5 + i] =
dev_boxes[(BLOCK_THREADS * col_start + threadIdx.x) * 5 + i];
}
}
__syncthreads();
if (threadIdx.x < row_size)
{
const int cur_box_idx = BLOCK_THREADS * row_start + threadIdx.x;
const DType *cur_box = dev_boxes + cur_box_idx * 5;
unsigned long long t = 0;
int start = 0;
if (row_start == col_start)
{
start = threadIdx.x + 1;
}
for (int i = start; i < col_size; i++)
{
if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh)
{
t |= 1ULL << i;
}
}
const int col_blocks = DIVUP(n_boxes, BLOCK_THREADS);
dev_mask[cur_box_idx * col_blocks + col_start] = t;
}
}
...@@ -59,4 +59,29 @@ PYBIND11_MODULE(spconv_utils, m) ...@@ -59,4 +59,29 @@ PYBIND11_MODULE(spconv_utils, m)
"num_points_per_voxel"_a = 4, "coor_to_voxelidx"_a = 5, "num_points_per_voxel"_a = 4, "coor_to_voxelidx"_a = 5,
"voxel_size"_a = 6, "coors_range"_a = 7, "max_points"_a = 8, "voxel_size"_a = 6, "coors_range"_a = 7, "max_points"_a = 8,
"max_voxels"_a = 9); "max_voxels"_a = 9);
m.def("points_to_voxel_3d_np_mean", &spconv::points_to_voxel_3d_np_mean<float, 3>,
"matrix tensor_square", "points"_a = 1, "voxels"_a = 2, "means"_a = 3, "coors"_a = 4,
"num_points_per_voxel"_a = 5, "coor_to_voxelidx"_a = 6,
"voxel_size"_a = 7, "coors_range"_a = 8, "max_points"_a = 9,
"max_voxels"_a = 10);
m.def("points_to_voxel_3d_np_mean", &spconv::points_to_voxel_3d_np_mean<double, 3>,
"matrix tensor_square", "points"_a = 1, "voxels"_a = 2, "means"_a = 3, "coors"_a = 4,
"num_points_per_voxel"_a = 5, "coor_to_voxelidx"_a = 6,
"voxel_size"_a = 7, "coors_range"_a = 8, "max_points"_a = 9,
"max_voxels"_a = 10);
m.def("points_to_voxel_3d_np_height", &spconv::points_to_voxel_3d_np_height<double, 3>,
"matrix tensor_square", "points"_a = 1, "voxels"_a = 2, "heights"_a = 3,
"maxs"_a = 4, "coors"_a = 5, "num_points_per_voxel"_a = 6, "coor_to_voxelidx"_a = 7,
"voxel_size"_a = 8, "coors_range"_a = 9, "max_points"_a = 10,
"max_voxels"_a = 11);
m.def("points_to_voxel_3d_with_filtering", &spconv::points_to_voxel_3d_with_filtering<float, 3>,
"matrix tensor_square", "points"_a = 1, "voxels"_a = 2, "voxel_mask"_a = 3, "mins"_a = 4,
"maxs"_a = 5, "coors"_a = 6, "num_points_per_voxel"_a = 7, "coor_to_voxelidx"_a = 8,
"voxel_size"_a = 9, "coors_range"_a = 10, "max_points"_a = 11,
"max_voxels"_a = 12, "block_factor"_a = 13, "block_size"_a = 14, "height_threshold"_a = 15);
m.def("points_to_voxel_3d_with_filtering", &spconv::points_to_voxel_3d_with_filtering<double, 3>,
"matrix tensor_square", "points"_a = 1, "voxels"_a = 2, "voxel_mask"_a = 3, "mins"_a = 4,
"maxs"_a = 5, "coors"_a = 6, "num_points_per_voxel"_a = 7, "coor_to_voxelidx"_a = 8,
"voxel_size"_a = 9, "coors_range"_a = 10, "max_points"_a = 11,
"max_voxels"_a = 12, "block_factor"_a = 13, "block_size"_a = 14, "height_threshold"_a = 15);
} }
\ No newline at end of file
...@@ -20,7 +20,7 @@ import numpy as np ...@@ -20,7 +20,7 @@ import numpy as np
import time import time
from spconv.test_utils import params_grid, generate_sparse_data, TestCase from spconv.test_utils import params_grid, generate_sparse_data, TestCase
import unittest import unittest
import sparseconvnet as scn # import sparseconvnet as scn
class SparseConv3dTestTorch(nn.Module): class SparseConv3dTestTorch(nn.Module):
def __init__(self, num_layers, ndim, shape, in_channels, out_channels, kernel_size, def __init__(self, num_layers, ndim, shape, in_channels, out_channels, kernel_size,
...@@ -612,7 +612,7 @@ def main(): ...@@ -612,7 +612,7 @@ def main():
if __name__ == '__main__': if __name__ == '__main__':
# main() main()
unittest.main() # unittest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment