1. fix #11: torch master don't support bool as jit operator argument.

2. fix SparseInverseConv problem 3. fix unittest problem

1. fix #11: torch master don't support bool as jit operator argument.
2. fix SparseInverseConv problem 3. fix unittest problem
a09b086a · traveller59 · 8e7c4346 · a09b086a · a09b086a · a09b086a
Commit a09b086a authored Jan 23, 2019 by traveller59
Showing with 47 additions and 31 deletions

README.md README.md +1 -1

include/spconv/spconv_ops.h include/spconv/spconv_ops.h +27 -15

spconv/conv.py spconv/conv.py +5 -0

spconv/ops.py spconv/ops.py +6 -6

test/test_conv.py test/test_conv.py +8 -9

No files found.
--- a/README.md
+++ b/README.md
@@ -91,7 +91,7 @@ class ExampleNet(nn.Module):
        super().__init__()
        self.net = spconv.SparseSequential(
            spconv.SparseConv3d(32, 64, 3, 2, indice_key="cp0"),
-            spconv.SparseInverseConv3d(64, 32, indice_key="cp0"),
+            spconv.SparseInverseConv3d(64, 32, 3, indice_key="cp0"), # need provide kernel size to create weight
        )
        self.shape = shape

--- a/include/spconv/spconv_ops.h
+++ b/include/spconv/spconv_ops.h
@@ -30,8 +30,10 @@ getIndicePair(torch::Tensor indices, int64_t batchSize,
        std::vector<int64_t> outSpatialShape, std::vector<int64_t> spatialShape,
        std::vector<int64_t> kernelSize, std::vector<int64_t> stride,
        std::vector<int64_t> padding, std::vector<int64_t> dilation,
-        std::vector<int64_t> outPadding, bool subM, bool transpose) {
+        std::vector<int64_t> outPadding, int64_t _subM, int64_t _transpose) {
  // auto timer = spconv::CudaContextTimer<>();
+  bool subM = _subM != 0;
+  bool transpose = _transpose != 0;
  auto numAct = indices.size(0);
  auto coorDim = indices.size(1) - 1; // batchIdx + xyz
  TV_ASSERT_RT_ERR(NDim == coorDim, "error");
@@ -140,8 +142,10 @@ getIndicePairPreGrid(torch::Tensor indices, torch::Tensor gridOut, int64_t batch
        std::vector<int64_t> outSpatialShape, std::vector<int64_t> spatialShape,
        std::vector<int64_t> kernelSize, std::vector<int64_t> stride,
        std::vector<int64_t> padding, std::vector<int64_t> dilation,
-        std::vector<int64_t> outPadding, bool subM, bool transpose) {
+        std::vector<int64_t> outPadding, int64_t _subM, int64_t _transpose) {
  // auto timer = spconv::CudaContextTimer<>();
+  bool subM = _subM != 0;
+  bool transpose = _transpose != 0;
  auto numAct = indices.size(0);
  auto coorDim = indices.size(1) - 1; // batchIdx + xyz
  TV_ASSERT_RT_ERR(NDim == coorDim, "error");
@@ -249,7 +253,9 @@ getIndicePairPreGrid(torch::Tensor indices, torch::Tensor gridOut, int64_t batch
 template <typename T>
 torch::Tensor indiceConv(torch::Tensor features, torch::Tensor filters,
                       torch::Tensor indicePairs, torch::Tensor indiceNum,
-                       int64_t numActOut, bool inverse, bool subm) {
+                       int64_t numActOut, int64_t _inverse, int64_t _subM) {
+  bool subM = _subM != 0;
+  bool inverse = _inverse != 0;
  auto device = features.device().type();
  auto ndim = filters.dim() - 2;
  auto kernelVolume = indicePairs.size(0);
@@ -261,7 +267,7 @@ torch::Tensor indiceConv(torch::Tensor features, torch::Tensor filters,
  int indicePairMaxOffset = indicePairMaxSizeIter - indicePairNumCpu.data<int>();
  int indicePairMaxSize = *indicePairMaxSizeIter;
-  /*if (subm){
+  /*if (_subM){
    std::vector<int> indicePairNumVec(indicePairNumCpu.data<int>(), indicePairNumCpu.data<int>() + kernelVolume);
    indicePairNumVec.erase(indicePairNumVec.begin() + indicePairMaxOffset);
@@ -280,7 +286,7 @@ torch::Tensor indiceConv(torch::Tensor features, torch::Tensor filters,
  torch::Tensor outputBuffer =
      torch::zeros({indicePairMaxSize, numOutPlanes}, options);
  filters = filters.view({-1, numInPlanes, numOutPlanes});
-  if (subm) { // the center index of subm conv don't need gather and scatter
+  if (subM) { // the center index of subm conv don't need gather and scatter
              // add.
    torch::mm_out(output, features, filters[indicePairMaxOffset]);
  }
@@ -289,7 +295,7 @@ torch::Tensor indiceConv(torch::Tensor features, torch::Tensor filters,
  double totalSAddTime = 0;
  for (int i = 0; i < kernelVolume; ++i) {
    auto nHot = indicePairNumCpu.data<int>()[i];
-    if (nHot <= 0 || (subm && i == indicePairMaxOffset)) {
+    if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {
      continue;
    }
    // auto timer = spconv::CudaContextTimer<>();
@@ -346,7 +352,10 @@ template <typename T>
 std::vector<torch::Tensor>
 indiceConvBackward(torch::Tensor features, torch::Tensor filters,
                 torch::Tensor outGrad, torch::Tensor indicePairs, torch::Tensor indiceNum,
-                 bool inverse, bool subm) {
+                 int64_t _inverse, int64_t _subM) {
+  bool subM = _subM != 0;
+  bool inverse = _inverse != 0;
  auto device = features.device().type();
  auto ndim = filters.dim() - 2;
  auto kernelVolume = indicePairs.size(0);
@@ -368,14 +377,14 @@ indiceConvBackward(torch::Tensor features, torch::Tensor filters,
  filters = filters.view({-1, numInPlanes, numOutPlanes});
  filtersGrad = filtersGrad.view({-1, numInPlanes, numOutPlanes});
-  if (subm) {
+  if (subM) {
    auto filterGradSub = filtersGrad[indicePairMaxOffset];
    torch::mm_out(filterGradSub, features.t(), outGrad);
    torch::mm_out(inputGrad, outGrad, filters[indicePairMaxOffset].t());
  }
  for (int i = 0; i < kernelVolume; ++i) {
    auto nHot = indicePairNumCpu.data<int>()[i];
-    if (nHot <= 0 || (subm && i == indicePairMaxOffset)) {
+    if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {
      continue;
    }
    if (device == torch::kCPU) {
@@ -426,7 +435,10 @@ indiceConvBackward(torch::Tensor features, torch::Tensor filters,
 template <typename T>
 torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor filters,
                         torch::Tensor indicePairs, torch::Tensor indiceNum,
-                         int64_t numActOut, bool inverse, bool subm) {
+                         int64_t numActOut, int64_t _inverse, int64_t _subM) {
+  bool subM = _subM != 0;
+  bool inverse = _inverse != 0;
  auto device = features.device().type();
  auto ndim = filters.dim() - 2;
  auto kernelVolume = indicePairs.size(0);
@@ -443,7 +455,7 @@ torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor fil
                              indicePairNumCpu.data<int>() + kernelVolume);
  indicePairNumVec.erase(indicePairNumVec.begin() + indicePairMaxOffset);
  int subRuleMaxSize = *std::max_element(indicePairNumVec.begin(), indicePairNumVec.end());
-  if (subm) {
+  if (subM) {
    indicePairMaxSize = subRuleMaxSize;
  }
  auto timer = spconv::CudaContextTimer<>();
@@ -459,7 +471,7 @@ torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor fil
      torch::zeros({kernelVolume, indicePairMaxSize, numOutPlanes}, options);
  filters = filters.view({-1, numInPlanes, numOutPlanes});
  std::cout << "create time " << timer.report()/1000.0 << std::endl;
-  if (subm) { // the center index of subm conv don't need gather and scatter
+  if (subM) { // the center index of subm conv don't need gather and scatter
              // add.
    torch::mm_out(output, features, filters[indicePairMaxOffset]);
  }
@@ -469,7 +481,7 @@ torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor fil
  // auto timer = spconv::CudaContextTimer<>();
  for (int i = 0; i < kernelVolume; ++i) {
    auto nHot = indicePairNumCpu.data<int>()[i];
-    if (nHot <= 0 || (subm && i == indicePairMaxOffset)) {
+    if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {
      continue;
    }
    // 
@@ -499,7 +511,7 @@ torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor fil
  // totalGatherTime += timer.report() / 1000.0;
  for (int i = 0; i < kernelVolume; ++i) {
    auto nHot = indicePairNumCpu.data<int>()[i];
-    if (nHot <= 0 || (subm && i == indicePairMaxOffset)) {
+    if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {
      continue;
    }
    auto outputBufferBlob = torch::from_blob(outputBuffer[i].data<T>(),
@@ -513,7 +525,7 @@ torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor fil
  // totalGEMMTime += timer.report() / 1000.0;
  for (int i = 0; i < kernelVolume; ++i) {
    auto nHot = indicePairNumCpu.data<int>()[i];
-    if (nHot <= 0 || (subm && i == indicePairMaxOffset)) {
+    if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {
      continue;
    }
    auto outputBufferBlob = torch::from_blob(outputBuffer[i].data<T>(),

--- a/spconv/conv.py
+++ b/spconv/conv.py
@@ -141,6 +141,7 @@ class SparseConvolution(SparseModule):
        if self.inverse:
            assert datas is not None and self.indice_key is not None
            _, outids, indice_pairs, indice_pair_num, out_spatial_shape = datas
+            assert indice_pairs.shape[0] == np.prod(self.kernel_size), "inverse conv must have same kernel size as its couple conv"
        else:
            if self.indice_key is not None and datas is not None:
                outids, _, indice_pairs, indice_pair_num, _ = datas
@@ -274,12 +275,14 @@ class SparseInverseConv2d(SparseConvolution):
    def __init__(self,
                 in_channels,
                 out_channels,
+                 kernel_size,
                 indice_key,
                 bias=True):
        super(SparseInverseConv2d, self).__init__(
            2,
            in_channels,
            out_channels,
+            kernel_size,
            bias=bias,
            inverse=True,
            indice_key=indice_key)
@@ -289,12 +292,14 @@ class SparseInverseConv3d(SparseConvolution):
    def __init__(self,
                 in_channels,
                 out_channels,
+                 kernel_size,
                 indice_key,
                 bias=True):
        super(SparseInverseConv3d, self).__init__(
            3,
            in_channels,
            out_channels,
+            kernel_size,
            bias=bias,
            inverse=True,
            indice_key=indice_key)

--- a/spconv/ops.py
+++ b/spconv/ops.py
@@ -86,7 +86,7 @@ def get_indice_pairs(indices,
        else:
            raise NotImplementedError
        return get_indice_pairs_func(indices, batch_size, out_shape, spatial_shape, ksize,
-                            stride, padding, dilation, out_padding, subm, transpose)
+                            stride, padding, dilation, out_padding, int(subm), int(transpose))
    else:
        if ndim == 2:
            get_indice_pairs_func = torch.ops.spconv.get_indice_pairs_grid_2d
@@ -95,7 +95,7 @@ def get_indice_pairs(indices,
        else:
            raise NotImplementedError
        return get_indice_pairs_func(indices, grid, batch_size, out_shape, spatial_shape, ksize,
-                            stride, padding, dilation, out_padding, subm, transpose)
+                            stride, padding, dilation, out_padding, int(subm), int(transpose))
@@ -109,11 +109,11 @@ def indice_conv(features,
    if filters.dtype == torch.float32:
        return torch.ops.spconv.indice_conv_fp32(features, filters, indice_pairs,
                                               indice_pair_num, num_activate_out,
-                                               inverse, subm)
+                                               int(inverse), int(subm))
    elif filters.dtype == torch.half:
        return torch.ops.spconv.indice_conv_half(features, filters, indice_pairs,
                                               indice_pair_num, num_activate_out,
-                                               inverse, subm)
+                                               int(inverse), int(subm))
    else:
        raise NotImplementedError
@@ -127,10 +127,10 @@ def indice_conv_backward(features,
                       subm=False):
    if filters.dtype == torch.float32:
        return torch.ops.spconv.indice_conv_backward_fp32(
-            features, filters, out_bp, indice_pairs, indice_pair_num, inverse, subm)
+            features, filters, out_bp, indice_pairs, indice_pair_num, int(inverse), int(subm))
    elif filters.dtype == torch.half:
        return torch.ops.spconv.indice_conv_backward_half(
-            features, filters, out_bp, indice_pairs, indice_pair_num, inverse, subm)
+            features, filters, out_bp, indice_pairs, indice_pair_num, int(inverse), int(subm))
    else:
        raise NotImplementedError

--- a/test/test_conv.py
+++ b/test/test_conv.py
@@ -20,7 +20,7 @@ import numpy as np
 import time 
 from spconv.test_utils import params_grid, generate_sparse_data, TestCase
 import unittest
-# import sparseconvnet as scn 
+import sparseconvnet as scn 
 class SparseConv3dTestTorch(nn.Module):
    def __init__(self, num_layers, ndim, shape, in_channels, out_channels, kernel_size,
@@ -278,11 +278,10 @@ class SparseCoupleDeConvTest(nn.Module):
                stride,
                indice_key="cp0",
                bias=False),
-            spconv.SparseConvCoupleTranspose3d(
+            spconv.SparseInverseConv3d(
                out_channels,
                in_channels,
                kernel_size,
-                stride,
                indice_key="cp0",
                bias=False),
@@ -361,7 +360,7 @@ class TestSpConv(TestCase):
            net_ref.net[0].weight.data[:] = filters_t.permute(4, 3, 0, 1, 2).contiguous()
            net.net[0].weight.data[:] = filters_t
            out_ref = net_ref(features_dense_t)
-            out = net(features_t, indices_t, bs).dense().permute(0, 4, 1, 2, 3).contiguous()
+            out = net(features_t, indices_t, bs).dense()
            dout = np.random.uniform(-0.2, 0.2, out_ref.shape).astype(features.dtype)
            dout_t = torch.from_numpy(dout).to(device)
            out.backward(dout_t)
@@ -420,7 +419,7 @@ class TestSpConv(TestCase):
            net_ref.net[0].weight.data[:] = filters_t.permute(3, 4, 0, 1, 2).contiguous()
            net.net[0].weight.data[:] = filters_t
            out_ref = net_ref(features_dense_t)
-            out = net(features_t, indices_t, bs).dense().permute(0, 4, 1, 2, 3).contiguous()
+            out = net(features_t, indices_t, bs).dense()
            dout = np.random.uniform(-0.2, 0.2, out_ref.shape).astype(features.dtype)
            dout_t = torch.from_numpy(dout).to(device)
            out.backward(dout_t)
@@ -500,7 +499,7 @@ class TestSpConv(TestCase):
    def testSpMaxPool3d(self):
-        np.random.seed(484)
+        np.random.seed(485)
        devices = ["cuda:0", "cpu:0"]
        shapes = [[19, 18, 17]]
        batchsizes = [1, 2]
@@ -538,7 +537,7 @@ class TestSpConv(TestCase):
            out = net(features_t, indices_t, bs)
            outids = out.indices
            outfeatures = out.features
-            out_dense = out.dense()
+            out_dense = out.dense(channels_first=False)
            out = out_dense.permute(0, 4, 1, 2, 3).contiguous()
            dout_sparse = np.random.uniform(-0.2, 0.2, outfeatures.shape).astype(features.dtype)
@@ -605,10 +604,10 @@ def main():
            out = net(features_t, indices_t, bs)
            torch.cuda.synchronize()
            times.append(time.time() - t)
-        print((net.grid == -1).float().sum(), net.grid.numel())
+        # print((net.grid == -1).float().sum(), net.grid.numel())
            # print("spconv time", time.time() - t)
        print("spconv time", np.mean(times[2:]))
-        out = net(features_t, indices_t, bs).dense().permute(0, 4, 1, 2, 3).contiguous()
+        out = net(features_t, indices_t, bs).dense()
        print(np.linalg.norm(out.detach().cpu().numpy() - out_ref.detach().cpu().numpy()))