add Minkowski conv kernel

cfaa1a3a · yanyan · 9ce18407 · cfaa1a3a · cfaa1a3a · cfaa1a3a
Commit cfaa1a3a authored Jun 16, 2020 by yanyan
7 changed files
--- a/src/spconv/pillar_scatter.cu
+++ b/src/spconv/pillar_scatter.cu
-// Copyright 2019 Yan Yan
+// Copyright 2019-2020 Yan Yan
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.

--- a/src/spconv/reordering.cc
+++ b/src/spconv/reordering.cc
-// Copyright 2019 Yan Yan
+// Copyright 2019-2020 Yan Yan
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.

--- a/src/spconv/reordering.cu
+++ b/src/spconv/reordering.cu
-// Copyright 2019 Yan Yan
+// Copyright 2019-2020 Yan Yan
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.

--- a/src/spconv/spconv_ops.cc
+++ b/src/spconv/spconv_ops.cc
@@ -246,8 +246,9 @@ torch::Tensor indiceConvNative(torch::Tensor features, torch::Tensor filters,
  return output;
 }

+template <int Algo>
 torch::Tensor
-indiceConvSparseConvNet(torch::Tensor features, torch::Tensor filters,
+indiceConvFused(torch::Tensor features, torch::Tensor filters,
                        torch::Tensor indicePairs, torch::Tensor indiceNum,
                        int64_t numActOut, int64_t _inverse, int64_t _subM) {
  auto kernelVolume = indiceNum.size(0);
@@ -277,11 +278,11 @@ indiceConvSparseConvNet(torch::Tensor features, torch::Tensor filters,
      continue;
    }
    if (device == torch::kCPU) {
-      TV_THROW_INVALID_ARG("SparseConvNet only support gpu");
+      TV_THROW_INVALID_ARG("fused only support gpu");
    }
 #ifdef TV_CUDA
    else if (device == torch::kCUDA) {
-      fused_conv_cuda(output, features, filters[i], indicePairs[inverse][i],
+      FusedConvDispatch<Algo>::fwd(output, features, filters[i], indicePairs[inverse][i],
                      indicePairs[!inverse][i], nHot);
    }
 #endif
@@ -421,37 +422,6 @@ torch::Tensor indiceConvBatch(torch::Tensor features, torch::Tensor filters,
  return output;
 }

-template <int Algo> struct ConvDispatch;
-
-template <> struct ConvDispatch<kNative> {
-  constexpr static auto *func = indiceConvNative;
-};
-
-template <> struct ConvDispatch<kBatch> {
-  constexpr static auto *func = indiceConvBatch<false>;
-};
-
-template <> struct ConvDispatch<kBatchGemmGather> {
-  constexpr static auto *func = indiceConvBatch<true>;
-};
-
-template <> struct ConvDispatch<kSparseConvNet> {
-  constexpr static auto *func = indiceConvSparseConvNet;
-};
-
-torch::Tensor indiceConv(torch::Tensor features, torch::Tensor filters,
-                         torch::Tensor indicePairs, torch::Tensor indiceNum,
-                         int64_t numActOut, int64_t _inverse, int64_t _subM,
-                         int64_t algo) {
-  torch::Tensor res;
-  tv::DispatchInt<all_conv_algos_t>()(algo, [&](auto I) {
-    constexpr int AlgoValue = decltype(I)::value;
-    res = ConvDispatch<AlgoValue>::func(features, filters, indicePairs,
-                                        indiceNum, numActOut, _inverse, _subM);
-  });
-  return res;
-}
-
 std::vector<torch::Tensor>
 indiceConvBwNative(torch::Tensor features, torch::Tensor filters,
                   torch::Tensor outGrad, torch::Tensor indicePairs,
@@ -544,8 +514,9 @@ indiceConvBwNative(torch::Tensor features, torch::Tensor filters,
  return {inputGrad, filtersGrad.view(filterShape)};
 }

+template <int Algo>
 std::vector<torch::Tensor>
-indiceConvBwSparseConvNet(torch::Tensor features, torch::Tensor filters,
+indiceConvBwFused(torch::Tensor features, torch::Tensor filters,
                          torch::Tensor outGrad, torch::Tensor indicePairs,
                          torch::Tensor indiceNum, int64_t _inverse,
                          int64_t _subM) {
@@ -585,7 +556,7 @@ indiceConvBwSparseConvNet(torch::Tensor features, torch::Tensor filters,
    }
 #ifdef TV_CUDA
    else if (device == torch::kCUDA) {
-      fused_conv_backward_cuda(features, inputGrad, outGrad, filters[i],
+      FusedConvDispatch<Algo>::bwd(features, inputGrad, outGrad, filters[i],
                               filtersGrad[i], indicePairs[inverse][i],
                               indicePairs[!inverse][i], nHot);
    }
@@ -725,24 +696,47 @@ indiceConvBwBatch(torch::Tensor features, torch::Tensor filters,
  return {inputGrad, filtersGrad.view(filterShape)};
 }

-template <int Algo> struct ConvBwDispatch;
+template <int Algo> struct ConvDispatch;

-template <> struct ConvBwDispatch<kNative> {
-  constexpr static auto *func = indiceConvBwNative;
+template <> struct ConvDispatch<kNative> {
+  constexpr static auto *fwd = indiceConvNative;
+  constexpr static auto *bwd = indiceConvBwNative;
 };

-template <> struct ConvBwDispatch<kBatch> {
-  constexpr static auto *func = indiceConvBwBatch<false>;
+template <> struct ConvDispatch<kBatch> {
+  constexpr static auto *fwd = indiceConvBatch<false>;
+  constexpr static auto *bwd = indiceConvBwBatch<false>;
 };

-template <> struct ConvBwDispatch<kBatchGemmGather> {
-  constexpr static auto *func = indiceConvBwBatch<true>;
+template <> struct ConvDispatch<kBatchGemmGather> {
+  constexpr static auto *fwd = indiceConvBatch<true>;
+  constexpr static auto *bwd = indiceConvBwBatch<true>;
 };

-template <> struct ConvBwDispatch<kSparseConvNet> {
-  constexpr static auto *func = indiceConvBwSparseConvNet;
+template <> struct ConvDispatch<kSparseConvNet> {
+  constexpr static auto *fwd = indiceConvFused<kFSparseConvNet>;
+  constexpr static auto *bwd = indiceConvBwFused<kFSparseConvNet>;
 };

+template <> struct ConvDispatch<kMinkowskiEngine> {
+  constexpr static auto *fwd = indiceConvFused<kFMinkowskiEngine>;
+  constexpr static auto *bwd = indiceConvBwFused<kFMinkowskiEngine>;
+};
+
+
+torch::Tensor indiceConv(torch::Tensor features, torch::Tensor filters,
+                         torch::Tensor indicePairs, torch::Tensor indiceNum,
+                         int64_t numActOut, int64_t _inverse, int64_t _subM,
+                         int64_t algo) {
+  torch::Tensor res;
+  tv::DispatchInt<all_conv_algos_t>()(algo, [&](auto I) {
+    constexpr int AlgoValue = decltype(I)::value;
+    res = ConvDispatch<AlgoValue>::fwd(features, filters, indicePairs,
+                                       indiceNum, numActOut, _inverse, _subM);
+  });
+  return res;
+}
+
 std::vector<torch::Tensor>
 indiceConvBackward(torch::Tensor features, torch::Tensor filters,
                   torch::Tensor outGrad, torch::Tensor indicePairs,
@@ -751,8 +745,8 @@ indiceConvBackward(torch::Tensor features, torch::Tensor filters,
  std::vector<torch::Tensor> res;
  tv::DispatchInt<all_conv_algos_t>()(algo, [&](auto I) {
    constexpr int AlgoValue = decltype(I)::value;
-    res = ConvBwDispatch<AlgoValue>::func(
-        features, filters, outGrad, indicePairs, indiceNum, _inverse, _subM);
+    res = ConvDispatch<AlgoValue>::bwd(features, filters, outGrad, indicePairs,
+                                       indiceNum, _inverse, _subM);
  });
  return res;
 }

--- a/src/utils/all.cc
+++ b/src/utils/all.cc
-// Copyright 2019 Yan Yan
+// Copyright 2019-2020 Yan Yan
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.

--- a/test/benchmark.py
+++ b/test/benchmark.py
@@ -23,41 +23,41 @@ def waymo_data(batch_size=1):


 class Net(nn.Module):
-    def __init__(self, shape):
+    def __init__(self, shape, algo):
        super().__init__()
        self.net = spconv.SparseSequential(
-            spconv.SubMConv3d(3, 64, 3, bias=False, indice_key="c0"),
-            spconv.SubMConv3d(64, 64, 3, bias=False, indice_key="c0"),
+            spconv.SubMConv3d(3, 64, 3, bias=False, indice_key="c0", algo=algo),
+            spconv.SubMConv3d(64, 64, 3, bias=False, indice_key="c0", algo=algo),
            # nn.BatchNorm1d(32),
            # nn.ReLU(),
            spconv.SparseMaxPool3d(2, 2),
-            spconv.SubMConv3d(64, 96, 3, bias=False, indice_key="c1"),
-            spconv.SubMConv3d(96, 96, 3, bias=False, indice_key="c1"),
+            spconv.SubMConv3d(64, 96, 3, bias=False, indice_key="c1", algo=algo),
+            spconv.SubMConv3d(96, 96, 3, bias=False, indice_key="c1", algo=algo),
            # nn.BatchNorm1d(64),
            # nn.ReLU(),
            spconv.SparseMaxPool3d(2, 2),
-            spconv.SubMConv3d(96, 128, 3, bias=False, indice_key="c2"),
-            spconv.SubMConv3d(128, 128, 3, bias=False, indice_key="c2"),
+            spconv.SubMConv3d(96, 128, 3, bias=False, indice_key="c2", algo=algo),
+            spconv.SubMConv3d(128, 128, 3, bias=False, indice_key="c2", algo=algo),
            # nn.BatchNorm1d(128),
            # nn.ReLU(),
            spconv.SparseMaxPool3d(2, 2),
-            spconv.SubMConv3d(128, 160, 3, bias=False, indice_key="c3"),
-            spconv.SubMConv3d(160, 160, 3, bias=False, indice_key="c3"),
+            spconv.SubMConv3d(128, 160, 3, bias=False, indice_key="c3", algo=algo),
+            spconv.SubMConv3d(160, 160, 3, bias=False, indice_key="c3", algo=algo),
            # nn.BatchNorm1d(128),
            # nn.ReLU(),
            spconv.SparseMaxPool3d(2, 2),
-            spconv.SubMConv3d(160, 192, 3, bias=False, indice_key="c4"),
-            spconv.SubMConv3d(192, 192, 3, bias=False, indice_key="c4"),
+            spconv.SubMConv3d(160, 192, 3, bias=False, indice_key="c4", algo=algo),
+            spconv.SubMConv3d(192, 192, 3, bias=False, indice_key="c4", algo=algo),
            # nn.BatchNorm1d(128),
            # nn.ReLU(),
            spconv.SparseMaxPool3d(2, 2),
-            spconv.SubMConv3d(192, 224, 3, bias=False, indice_key="c5"),
-            spconv.SubMConv3d(224, 224, 3, bias=False, indice_key="c5"),
+            spconv.SubMConv3d(192, 224, 3, bias=False, indice_key="c5", algo=algo),
+            spconv.SubMConv3d(224, 224, 3, bias=False, indice_key="c5", algo=algo),
            # nn.BatchNorm1d(128),
            # nn.ReLU(),
            spconv.SparseMaxPool3d(2, 2),
-            spconv.SubMConv3d(224, 256, 3, bias=False, indice_key="c6"),
-            spconv.SubMConv3d(256, 256, 3, bias=False, indice_key="c6"),
+            spconv.SubMConv3d(224, 256, 3, bias=False, indice_key="c6", algo=algo),
+            spconv.SubMConv3d(256, 256, 3, bias=False, indice_key="c6", algo=algo),
        )
        max_batch_size = 1
        # grid (dense map) is used for indice generation. use pre-allocated grid can run faster.
@@ -76,7 +76,8 @@ def main():
    voxels, coors, spatial_shape = waymo_data()
    voxels_th = torch.from_numpy(voxels).cuda().float()
    coors_th = torch.from_numpy(coors).cuda().int()
-    net = Net(spatial_shape[::-1]).cuda().eval().float()
+    algo = spconv.ConvAlgo.Native
+    net = Net(spatial_shape[::-1], algo).cuda().eval().float()
    print(coors_th.shape)
    out = net(voxels_th, coors_th, 1)
    print(out.spatial_shape)

--- a/test/test_conv.py
+++ b/test/test_conv.py
-# Copyright 2019 Yan Yan
+# Copyright 2019-2020 Yan Yan
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -37,7 +37,7 @@ class SparseConv3dTestTorch(nn.Module):
                 stride,
                 padding,
                 dilation,
-                 algo=spconv.ConvAlgo.SparseConvNet):
+                 algo=spconv.ConvAlgo.Minkowski):
        super().__init__()
        layers = [
            spconv.SparseConv3d(in_channels,