Commit a09b086a authored by traveller59's avatar traveller59
Browse files

1. fix #11: torch master don't support bool as jit operator argument.

2. fix SparseInverseConv problem
3. fix unittest problem
parent 8e7c4346
......@@ -91,7 +91,7 @@ class ExampleNet(nn.Module):
super().__init__()
self.net = spconv.SparseSequential(
spconv.SparseConv3d(32, 64, 3, 2, indice_key="cp0"),
spconv.SparseInverseConv3d(64, 32, indice_key="cp0"),
spconv.SparseInverseConv3d(64, 32, 3, indice_key="cp0"), # need provide kernel size to create weight
)
self.shape = shape
......
......@@ -30,8 +30,10 @@ getIndicePair(torch::Tensor indices, int64_t batchSize,
std::vector<int64_t> outSpatialShape, std::vector<int64_t> spatialShape,
std::vector<int64_t> kernelSize, std::vector<int64_t> stride,
std::vector<int64_t> padding, std::vector<int64_t> dilation,
std::vector<int64_t> outPadding, bool subM, bool transpose) {
std::vector<int64_t> outPadding, int64_t _subM, int64_t _transpose) {
// auto timer = spconv::CudaContextTimer<>();
bool subM = _subM != 0;
bool transpose = _transpose != 0;
auto numAct = indices.size(0);
auto coorDim = indices.size(1) - 1; // batchIdx + xyz
TV_ASSERT_RT_ERR(NDim == coorDim, "error");
......@@ -140,8 +142,10 @@ getIndicePairPreGrid(torch::Tensor indices, torch::Tensor gridOut, int64_t batch
std::vector<int64_t> outSpatialShape, std::vector<int64_t> spatialShape,
std::vector<int64_t> kernelSize, std::vector<int64_t> stride,
std::vector<int64_t> padding, std::vector<int64_t> dilation,
std::vector<int64_t> outPadding, bool subM, bool transpose) {
std::vector<int64_t> outPadding, int64_t _subM, int64_t _transpose) {
// auto timer = spconv::CudaContextTimer<>();
bool subM = _subM != 0;
bool transpose = _transpose != 0;
auto numAct = indices.size(0);
auto coorDim = indices.size(1) - 1; // batchIdx + xyz
TV_ASSERT_RT_ERR(NDim == coorDim, "error");
......@@ -249,7 +253,9 @@ getIndicePairPreGrid(torch::Tensor indices, torch::Tensor gridOut, int64_t batch
template <typename T>
torch::Tensor indiceConv(torch::Tensor features, torch::Tensor filters,
torch::Tensor indicePairs, torch::Tensor indiceNum,
int64_t numActOut, bool inverse, bool subm) {
int64_t numActOut, int64_t _inverse, int64_t _subM) {
bool subM = _subM != 0;
bool inverse = _inverse != 0;
auto device = features.device().type();
auto ndim = filters.dim() - 2;
auto kernelVolume = indicePairs.size(0);
......@@ -261,7 +267,7 @@ torch::Tensor indiceConv(torch::Tensor features, torch::Tensor filters,
int indicePairMaxOffset = indicePairMaxSizeIter - indicePairNumCpu.data<int>();
int indicePairMaxSize = *indicePairMaxSizeIter;
/*if (subm){
/*if (_subM){
std::vector<int> indicePairNumVec(indicePairNumCpu.data<int>(), indicePairNumCpu.data<int>() + kernelVolume);
indicePairNumVec.erase(indicePairNumVec.begin() + indicePairMaxOffset);
......@@ -280,7 +286,7 @@ torch::Tensor indiceConv(torch::Tensor features, torch::Tensor filters,
torch::Tensor outputBuffer =
torch::zeros({indicePairMaxSize, numOutPlanes}, options);
filters = filters.view({-1, numInPlanes, numOutPlanes});
if (subm) { // the center index of subm conv don't need gather and scatter
if (subM) { // the center index of subm conv don't need gather and scatter
// add.
torch::mm_out(output, features, filters[indicePairMaxOffset]);
}
......@@ -289,7 +295,7 @@ torch::Tensor indiceConv(torch::Tensor features, torch::Tensor filters,
double totalSAddTime = 0;
for (int i = 0; i < kernelVolume; ++i) {
auto nHot = indicePairNumCpu.data<int>()[i];
if (nHot <= 0 || (subm && i == indicePairMaxOffset)) {
if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {
continue;
}
// auto timer = spconv::CudaContextTimer<>();
......@@ -346,7 +352,10 @@ template <typename T>
std::vector<torch::Tensor>
indiceConvBackward(torch::Tensor features, torch::Tensor filters,
torch::Tensor outGrad, torch::Tensor indicePairs, torch::Tensor indiceNum,
bool inverse, bool subm) {
int64_t _inverse, int64_t _subM) {
bool subM = _subM != 0;
bool inverse = _inverse != 0;
auto device = features.device().type();
auto ndim = filters.dim() - 2;
auto kernelVolume = indicePairs.size(0);
......@@ -368,14 +377,14 @@ indiceConvBackward(torch::Tensor features, torch::Tensor filters,
filters = filters.view({-1, numInPlanes, numOutPlanes});
filtersGrad = filtersGrad.view({-1, numInPlanes, numOutPlanes});
if (subm) {
if (subM) {
auto filterGradSub = filtersGrad[indicePairMaxOffset];
torch::mm_out(filterGradSub, features.t(), outGrad);
torch::mm_out(inputGrad, outGrad, filters[indicePairMaxOffset].t());
}
for (int i = 0; i < kernelVolume; ++i) {
auto nHot = indicePairNumCpu.data<int>()[i];
if (nHot <= 0 || (subm && i == indicePairMaxOffset)) {
if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {
continue;
}
if (device == torch::kCPU) {
......@@ -426,7 +435,10 @@ indiceConvBackward(torch::Tensor features, torch::Tensor filters,
template <typename T>
torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor filters,
torch::Tensor indicePairs, torch::Tensor indiceNum,
int64_t numActOut, bool inverse, bool subm) {
int64_t numActOut, int64_t _inverse, int64_t _subM) {
bool subM = _subM != 0;
bool inverse = _inverse != 0;
auto device = features.device().type();
auto ndim = filters.dim() - 2;
auto kernelVolume = indicePairs.size(0);
......@@ -443,7 +455,7 @@ torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor fil
indicePairNumCpu.data<int>() + kernelVolume);
indicePairNumVec.erase(indicePairNumVec.begin() + indicePairMaxOffset);
int subRuleMaxSize = *std::max_element(indicePairNumVec.begin(), indicePairNumVec.end());
if (subm) {
if (subM) {
indicePairMaxSize = subRuleMaxSize;
}
auto timer = spconv::CudaContextTimer<>();
......@@ -459,7 +471,7 @@ torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor fil
torch::zeros({kernelVolume, indicePairMaxSize, numOutPlanes}, options);
filters = filters.view({-1, numInPlanes, numOutPlanes});
std::cout << "create time " << timer.report()/1000.0 << std::endl;
if (subm) { // the center index of subm conv don't need gather and scatter
if (subM) { // the center index of subm conv don't need gather and scatter
// add.
torch::mm_out(output, features, filters[indicePairMaxOffset]);
}
......@@ -469,7 +481,7 @@ torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor fil
// auto timer = spconv::CudaContextTimer<>();
for (int i = 0; i < kernelVolume; ++i) {
auto nHot = indicePairNumCpu.data<int>()[i];
if (nHot <= 0 || (subm && i == indicePairMaxOffset)) {
if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {
continue;
}
//
......@@ -499,7 +511,7 @@ torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor fil
// totalGatherTime += timer.report() / 1000.0;
for (int i = 0; i < kernelVolume; ++i) {
auto nHot = indicePairNumCpu.data<int>()[i];
if (nHot <= 0 || (subm && i == indicePairMaxOffset)) {
if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {
continue;
}
auto outputBufferBlob = torch::from_blob(outputBuffer[i].data<T>(),
......@@ -513,7 +525,7 @@ torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor fil
// totalGEMMTime += timer.report() / 1000.0;
for (int i = 0; i < kernelVolume; ++i) {
auto nHot = indicePairNumCpu.data<int>()[i];
if (nHot <= 0 || (subm && i == indicePairMaxOffset)) {
if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {
continue;
}
auto outputBufferBlob = torch::from_blob(outputBuffer[i].data<T>(),
......
......@@ -141,6 +141,7 @@ class SparseConvolution(SparseModule):
if self.inverse:
assert datas is not None and self.indice_key is not None
_, outids, indice_pairs, indice_pair_num, out_spatial_shape = datas
assert indice_pairs.shape[0] == np.prod(self.kernel_size), "inverse conv must have same kernel size as its couple conv"
else:
if self.indice_key is not None and datas is not None:
outids, _, indice_pairs, indice_pair_num, _ = datas
......@@ -274,12 +275,14 @@ class SparseInverseConv2d(SparseConvolution):
def __init__(self,
in_channels,
out_channels,
kernel_size,
indice_key,
bias=True):
super(SparseInverseConv2d, self).__init__(
2,
in_channels,
out_channels,
kernel_size,
bias=bias,
inverse=True,
indice_key=indice_key)
......@@ -289,12 +292,14 @@ class SparseInverseConv3d(SparseConvolution):
def __init__(self,
in_channels,
out_channels,
kernel_size,
indice_key,
bias=True):
super(SparseInverseConv3d, self).__init__(
3,
in_channels,
out_channels,
kernel_size,
bias=bias,
inverse=True,
indice_key=indice_key)
......
......@@ -86,7 +86,7 @@ def get_indice_pairs(indices,
else:
raise NotImplementedError
return get_indice_pairs_func(indices, batch_size, out_shape, spatial_shape, ksize,
stride, padding, dilation, out_padding, subm, transpose)
stride, padding, dilation, out_padding, int(subm), int(transpose))
else:
if ndim == 2:
get_indice_pairs_func = torch.ops.spconv.get_indice_pairs_grid_2d
......@@ -95,7 +95,7 @@ def get_indice_pairs(indices,
else:
raise NotImplementedError
return get_indice_pairs_func(indices, grid, batch_size, out_shape, spatial_shape, ksize,
stride, padding, dilation, out_padding, subm, transpose)
stride, padding, dilation, out_padding, int(subm), int(transpose))
......@@ -109,11 +109,11 @@ def indice_conv(features,
if filters.dtype == torch.float32:
return torch.ops.spconv.indice_conv_fp32(features, filters, indice_pairs,
indice_pair_num, num_activate_out,
inverse, subm)
int(inverse), int(subm))
elif filters.dtype == torch.half:
return torch.ops.spconv.indice_conv_half(features, filters, indice_pairs,
indice_pair_num, num_activate_out,
inverse, subm)
int(inverse), int(subm))
else:
raise NotImplementedError
......@@ -127,10 +127,10 @@ def indice_conv_backward(features,
subm=False):
if filters.dtype == torch.float32:
return torch.ops.spconv.indice_conv_backward_fp32(
features, filters, out_bp, indice_pairs, indice_pair_num, inverse, subm)
features, filters, out_bp, indice_pairs, indice_pair_num, int(inverse), int(subm))
elif filters.dtype == torch.half:
return torch.ops.spconv.indice_conv_backward_half(
features, filters, out_bp, indice_pairs, indice_pair_num, inverse, subm)
features, filters, out_bp, indice_pairs, indice_pair_num, int(inverse), int(subm))
else:
raise NotImplementedError
......
......@@ -20,7 +20,7 @@ import numpy as np
import time
from spconv.test_utils import params_grid, generate_sparse_data, TestCase
import unittest
# import sparseconvnet as scn
import sparseconvnet as scn
class SparseConv3dTestTorch(nn.Module):
def __init__(self, num_layers, ndim, shape, in_channels, out_channels, kernel_size,
......@@ -278,11 +278,10 @@ class SparseCoupleDeConvTest(nn.Module):
stride,
indice_key="cp0",
bias=False),
spconv.SparseConvCoupleTranspose3d(
spconv.SparseInverseConv3d(
out_channels,
in_channels,
kernel_size,
stride,
indice_key="cp0",
bias=False),
......@@ -361,7 +360,7 @@ class TestSpConv(TestCase):
net_ref.net[0].weight.data[:] = filters_t.permute(4, 3, 0, 1, 2).contiguous()
net.net[0].weight.data[:] = filters_t
out_ref = net_ref(features_dense_t)
out = net(features_t, indices_t, bs).dense().permute(0, 4, 1, 2, 3).contiguous()
out = net(features_t, indices_t, bs).dense()
dout = np.random.uniform(-0.2, 0.2, out_ref.shape).astype(features.dtype)
dout_t = torch.from_numpy(dout).to(device)
out.backward(dout_t)
......@@ -420,7 +419,7 @@ class TestSpConv(TestCase):
net_ref.net[0].weight.data[:] = filters_t.permute(3, 4, 0, 1, 2).contiguous()
net.net[0].weight.data[:] = filters_t
out_ref = net_ref(features_dense_t)
out = net(features_t, indices_t, bs).dense().permute(0, 4, 1, 2, 3).contiguous()
out = net(features_t, indices_t, bs).dense()
dout = np.random.uniform(-0.2, 0.2, out_ref.shape).astype(features.dtype)
dout_t = torch.from_numpy(dout).to(device)
out.backward(dout_t)
......@@ -500,7 +499,7 @@ class TestSpConv(TestCase):
def testSpMaxPool3d(self):
np.random.seed(484)
np.random.seed(485)
devices = ["cuda:0", "cpu:0"]
shapes = [[19, 18, 17]]
batchsizes = [1, 2]
......@@ -538,7 +537,7 @@ class TestSpConv(TestCase):
out = net(features_t, indices_t, bs)
outids = out.indices
outfeatures = out.features
out_dense = out.dense()
out_dense = out.dense(channels_first=False)
out = out_dense.permute(0, 4, 1, 2, 3).contiguous()
dout_sparse = np.random.uniform(-0.2, 0.2, outfeatures.shape).astype(features.dtype)
......@@ -605,10 +604,10 @@ def main():
out = net(features_t, indices_t, bs)
torch.cuda.synchronize()
times.append(time.time() - t)
print((net.grid == -1).float().sum(), net.grid.numel())
# print((net.grid == -1).float().sum(), net.grid.numel())
# print("spconv time", time.time() - t)
print("spconv time", np.mean(times[2:]))
out = net(features_t, indices_t, bs).dense().permute(0, 4, 1, 2, 3).contiguous()
out = net(features_t, indices_t, bs).dense()
print(np.linalg.norm(out.detach().cpu().numpy() - out_ref.detach().cpu().numpy()))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment