initial release

3f1316d5 · traveller59 · a347176a · 3f1316d5 · 3f1316d5 · 3f1316d5
Commit 3f1316d5 authored Jan 20, 2019 by traveller59
20 changed files
--- a/include/spconv/spconv_ops.h
+++ b/include/spconv/spconv_ops.h
--- a/include/tensorview/helper_kernel.cu.h
+++ b/include/tensorview/helper_kernel.cu.h
+#pragma once
+// from tensorflow
+namespace tv
+{
+namespace detail
+{
+template <typename T>
+class KernelLoop
+{
+  struct Iterator
+  {
+    __forceinline__ __device__ Iterator(T index, T delta) : index_(index), delta_(delta) {}
+    __forceinline__ __device__ T operator*() const { return index_; }
+    __forceinline__ __device__ Iterator &operator++()
+    {
+      index_ += delta_;
+      return *this;
+    }
+    __forceinline__ __device__ bool operator!=(const Iterator &other) const
+    {
+      bool greater = index_ > other.index_;
+      bool less = index_ < other.index_;
+      // Anything past an end iterator (delta_ == 0) is equal.
+      // In range-based for loops, this optimizes to 'return less'.
+      if (!other.delta_)
+      {
+        return less;
+      }
+      if (!delta_)
+      {
+        return greater;
+      }
+      return less || greater;
+    }
+  private:
+    T index_;
+    const T delta_;
+  };
+public:
+  __forceinline__ __device__ KernelLoop(T begin, T delta, T end)
+      : begin_(begin), delta_(delta), end_(end) {}
+  __forceinline__ __device__ Iterator begin() const { return Iterator{begin_, delta_}; }
+  __forceinline__ __device__ Iterator end() const { return Iterator{end_, 0}; }
+private:
+  T begin_;
+  T delta_;
+  T end_;
+};
+} // namespace detail
+template <typename T, int NumILP=1>
+__forceinline__ __device__ detail::KernelLoop<T> KernelLoopX(T count)
+{
+  return detail::KernelLoop<T>(blockIdx.x * blockDim.x + threadIdx.x,
+                                  gridDim.x * blockDim.x * NumILP, count);
+}
+// Helper to visit indices in the range 0 <= i < count using the y-coordinate.
+// Usage: for(int i : KernelLoopY(count)) { visit(i); }
+template <typename T, int NumILP=1>
+__forceinline__ __device__ detail::KernelLoop<T> KernelLoopY(T count)
+{
+  return detail::KernelLoop<T>(blockIdx.y * blockDim.y + threadIdx.y,
+                                  gridDim.y * blockDim.y * NumILP, count);
+}
+// Helper to visit indices in the range 0 <= i < count using the z-coordinate.
+// Usage: for(int i : KernelLoopZ(count)) { visit(i); }
+template <typename T, int NumILP=1>
+__forceinline__ __device__ detail::KernelLoop<T> KernelLoopZ(T count)
+{
+  return detail::KernelLoop<T>(blockIdx.z * blockDim.z + threadIdx.z,
+                                  gridDim.z * blockDim.z * NumILP, count);
+}
+} // namespace tv
\ No newline at end of file
--- a/include/tensorview/helper_launch.h
+++ b/include/tensorview/helper_launch.h
+#pragma once
+// from pytorch.aten
+#include "tensorview.h"
+namespace tv
+{
+namespace launch
+{
+template <typename T1, typename T2>
+inline int DivUp(const T1 a, const T2 b) { return (a + b - 1) / b; }
+// Use 1024 threads per block, which requires cuda sm_2x or above
+constexpr int CUDA_NUM_THREADS = 1024;
+// CUDA: number of blocks for threads.
+inline int getBlocks(const int N)
+{
+    TV_ASSERT_RT_ERR(N > 0, "CUDA kernel launch blocks must be positive, but got N=", N);
+    return DivUp(N, CUDA_NUM_THREADS);
+}
+} // namespace launch
+} // namespace tv
\ No newline at end of file
--- a/include/tensorview/tensorview.h
+++ b/include/tensorview/tensorview.h
--- a/include/torch_utils.h
+++ b/include/torch_utils.h
+// Copyright 2019 Yan Yan
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// 
+//     http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <tensorview/tensorview.h>
+#include <torch/script.h>
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+namespace tv {
+struct TorchGPU: public tv::GPU {
+  TorchGPU(){
+    mStream = at::cuda::getCurrentCUDAStream();
+  }
+};
+template <typename T> void check_torch_dtype(const torch::Tensor &tensor) {
+  switch (tensor.type().scalarType()) {
+  case at::ScalarType::Double: {
+    auto val = std::is_same<std::remove_const_t<T>, double>::value;
+    TV_ASSERT_RT_ERR(val, "error");
+    break;
+  }
+  case at::ScalarType::Float: {
+    auto val = std::is_same<std::remove_const_t<T>, float>::value;
+    TV_ASSERT_RT_ERR(val, "error");
+    break;
+  }
+  case at::ScalarType::Int: {
+    auto val = std::is_same<std::remove_const_t<T>, int>::value;
+    TV_ASSERT_RT_ERR(val, "error");
+    break;
+  }
+  case at::ScalarType::Half: {
+    auto val = std::is_same<std::remove_const_t<T>, at::Half>::value;
+    TV_ASSERT_RT_ERR(val, "error");
+    break;
+  }
+  default:
+    TV_ASSERT_RT_ERR(false, "error");
+  }
+}
+template <typename T>
+tv::TensorView<T> torch2tv(const torch::Tensor &tensor) {
+  check_torch_dtype<T>(tensor);
+  tv::Shape shape;
+  for (auto i : tensor.sizes()) {
+    shape.push_back(i);
+  }
+  return tv::TensorView<T>(tensor.data<std::remove_const_t<T>>(), shape);
+}
+} // namespace tv
\ No newline at end of file
--- a/include/utility/timer.h
+++ b/include/utility/timer.h
+// Copyright 2019 Yan Yan
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <chrono>
+#include <cuda_runtime_api.h>
+#include <iostream>
+namespace spconv {
+template <typename TimeT = std::chrono::microseconds> struct CudaContextTimer {
+  CudaContextTimer() {
+    cudaDeviceSynchronize();
+    mCurTime = std::chrono::steady_clock::now();
+  }
+  typename TimeT::rep report() {
+    cudaDeviceSynchronize();
+    auto duration = std::chrono::duration_cast<TimeT>(
+        std::chrono::steady_clock::now() - mCurTime);
+    auto res = duration.count();
+    mCurTime = std::chrono::steady_clock::now();
+    return res;
+  }
+private:
+  std::chrono::time_point<std::chrono::steady_clock> mCurTime;
+};
+template <typename TimeT = std::chrono::microseconds> struct CPUTimer {
+  CPUTimer() { mCurTime = std::chrono::steady_clock::now(); }
+  typename TimeT::rep report() {
+    auto duration = std::chrono::duration_cast<TimeT>(
+        std::chrono::steady_clock::now() - mCurTime);
+    auto res = duration.count();
+    mCurTime = std::chrono::steady_clock::now();
+    return res;
+  }
+private:
+  std::chrono::time_point<std::chrono::steady_clock> mCurTime;
+};
+} // namespace spconv
--- a/setup.py
+++ b/setup.py
+import os
+import re
+import sys
+import platform
+import subprocess
+from setuptools import setup, Extension, find_packages
+from setuptools.command.build_ext import build_ext
+from distutils.version import LooseVersion
+if 'LIBTORCH_ROOT' not in os.environ:
+    raise ValueError("You must set LIBTORCH_ROOT to your torch c++ library.")
+PYTHON_VERSION = "{}.{}".format(sys.version_info.major, sys.version_info.minor)
+class CMakeExtension(Extension):
+    def __init__(self, name, sourcedir='', library_dirs=[]):
+        Extension.__init__(self, name, sources=[], library_dirs=library_dirs)
+        self.sourcedir = os.path.abspath(sourcedir)
+class CMakeBuild(build_ext):
+    def run(self):
+        try:
+            out = subprocess.check_output(['cmake', '--version'])
+        except OSError:
+            raise RuntimeError("CMake must be installed to build the following extensions: " +
+                               ", ".join(e.name for e in self.extensions))
+        if platform.system() == "Windows":
+            raise NotImplementedError
+        for ext in self.extensions:
+            self.build_extension(ext)
+    def build_extension(self, ext):
+        extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))
+        print(extdir)
+        cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir + "/spconv",
+                      '-DCMAKE_PREFIX_PATH=' + os.environ["LIBTORCH_ROOT"],
+                      '-DPYBIND11_PYTHON_VERSION={}'.format(PYTHON_VERSION),
+                      '-DSPCONV_BuildTests=OFF',
+                      '-DCMAKE_CUDA_FLAGS="--expt-relaxed-constexpr"']
+        cfg = 'Debug' if self.debug else 'Release'
+        # cfg = 'Debug'
+        build_args = ['--config', cfg]
+        print(cfg)
+        if platform.system() == "Windows":
+            cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), extdir)]
+            if sys.maxsize > 2**32:
+                cmake_args += ['-A', 'x64']
+            build_args += ['--', '/m']
+        else:
+            cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
+            build_args += ['--', '-j4']
+        env = os.environ.copy()
+        env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''),
+                                                              self.distribution.get_version())
+        if not os.path.exists(self.build_temp):
+            os.makedirs(self.build_temp)
+        subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env)
+        subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp)
+packages = find_packages(exclude=('tools', 'tools.*'))
+setup(
+    name='spconv',
+    version='1.0',
+    author='Yan Yan',
+    author_email='scrin@foxmail.com',
+    description='spatial sparse convolution for pytorch',
+    long_description='',
+    setup_requires = ['torch>=1.0.0'],
+    packages=packages,
+    package_dir = {'spconv': 'spconv'},
+    ext_modules=[CMakeExtension('spconv', library_dirs=[])],
+    cmdclass=dict(build_ext=CMakeBuild),
+    zip_safe=False,
+)
--- a/spconv/__init__.py
+++ b/spconv/__init__.py
+# Copyright 2019 Yan Yan
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pathlib import Path
+import numpy as np
+import torch
+from spconv import utils
+from spconv.conv import SparseConv2d, SparseConv3d, SubMConv2d, SubMConv3d
+from spconv.conv import SparseConvTranspose2d, SparseConvTranspose3d
+from spconv.conv import SparseInverseConv2d, SparseInverseConv3d
+from spconv.modules import SparseModule, SparseSequential
+from spconv.pool import SparseMaxPool2d, SparseMaxPool3d
+_LIB_PATH = str(Path(__file__).parent / "libspconv.so")
+torch.ops.load_library(_LIB_PATH)
+def scatter_nd(indices, updates, shape):
+    """pytorch edition of tensorflow scatter_nd.
+    this function don't contain except handle code. so use this carefully
+    when indice repeats, don't support repeat add which is supported
+    in tensorflow.
+    """
+    ret = torch.zeros(*shape, dtype=updates.dtype, device=updates.device)
+    ndim = indices.shape[-1]
+    output_shape = list(indices.shape[:-1]) + shape[indices.shape[-1]:]
+    flatted_indices = indices.view(-1, ndim)
+    slices = [flatted_indices[:, i] for i in range(ndim)]
+    slices += [Ellipsis]
+    ret[slices] = updates.view(*output_shape)
+    return ret
+class SparseConvTensor(object):
+    def __init__(self, features, indices, spatial_shape, batch_size, grid=None):
+        """
+        Args:
+            grid: pre-allocated grid tensor. should be used when the volume of spatial shape
+                is very large.
+        """
+        self.features = features
+        self.indices = indices 
+        if self.indices.dtype != torch.int32:
+            self.indices.int()
+        self.spatial_shape = spatial_shape
+        self.batch_size = batch_size
+        self.indice_dict = {}
+        self.grid = grid
+    @property
+    def spatial_size(self):
+        return np.prod(self.spatial_shape)
+    def find_indice_pair(self, key):
+        if key is None:
+            return None 
+        if key in self.indice_dict:
+            return self.indice_dict[key]
+        return None
+    def dense(self, channels_first=True):
+        output_shape = [self.batch_size] + list(self.spatial_shape) + [self.features.shape[1]]
+        res = scatter_nd(self.indices.long(), self.features, output_shape)
+        if not channels_first:
+            return res
+        ndim = len(self.spatial_shape)
+        trans_params = list(range(0, ndim + 1))
+        trans_params.insert(1, ndim + 1)
+        return res.permute(*trans_params).contiguous()
+    @property
+    def sparity(self):
+        return self.indices.shape[0] / np.prod(self.spatial_shape) / self.batch_size
+class ToDense(SparseModule):
+    """convert SparseConvTensor to NCHW dense tensor.
+    """
+    def forward(self, x: SparseConvTensor):
+        return x.dense()
+class RemoveGrid(SparseModule):
+    """remove pre-allocated grid buffer.
+    """
+    def forward(self, x: SparseConvTensor):
+        x.grid = None
+        return x
\ No newline at end of file
--- a/spconv/conv.py
+++ b/spconv/conv.py
+# Copyright 2019 Yan Yan
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import time
+import numpy as np
+import spconv
+import spconv.functional as Fsp
+import torch
+from spconv import ops
+from spconv.modules import SparseModule
+from torch import nn
+from torch.nn import init
+from torch.nn.parameter import Parameter
+def _calculate_fan_in_and_fan_out_hwio(tensor):
+    dimensions = tensor.ndimension()
+    if dimensions < 2:
+        raise ValueError(
+            "Fan in and fan out can not be computed for tensor with fewer than 2 dimensions"
+        )
+    if dimensions == 2:  # Linear
+        fan_in = tensor.size(-2)
+        fan_out = tensor.size(-1)
+    else:
+        num_input_fmaps = tensor.size(-2)
+        num_output_fmaps = tensor.size(-1)
+        receptive_field_size = 1
+        if tensor.dim() > 2:
+            receptive_field_size = tensor[..., 0, 0].numel()
+        fan_in = num_input_fmaps * receptive_field_size
+        fan_out = num_output_fmaps * receptive_field_size
+    return fan_in, fan_out
+class SparseConvolution(SparseModule):
+    def __init__(self,
+                 ndim,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 subm=False,
+                 output_padding=0,
+                 transposed=False,
+                 inverse=False,
+                 indice_key=None):
+        super(SparseConvolution, self).__init__()
+        assert groups == 1
+        if not isinstance(kernel_size, (list, tuple)):
+            kernel_size = [kernel_size] * ndim
+        if not isinstance(stride, (list, tuple)):
+            stride = [stride] * ndim
+        if not isinstance(padding, (list, tuple)):
+            padding = [padding] * ndim
+        if not isinstance(dilation, (list, tuple)):
+            dilation = [dilation] * ndim
+        if not isinstance(output_padding, (list, tuple)):
+            output_padding = [output_padding] * ndim
+        for d, s in zip(dilation, stride):
+            assert any([s == 1, d == 1]), "don't support this."
+        self.ndim = ndim
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.conv1x1 = np.prod(kernel_size) == 1
+        self.stride = stride
+        self.padding = padding
+        self.dilation = dilation
+        self.transposed = transposed
+        self.inverse = inverse
+        self.output_padding = output_padding
+        self.groups = groups
+        self.subm = subm
+        self.indice_key = indice_key
+        self.weight = Parameter(
+            torch.Tensor(*kernel_size, in_channels, out_channels))
+        if bias:
+            self.bias = Parameter(torch.Tensor(out_channels))
+        else:
+            self.register_parameter('bias', None)
+        self.reset_parameters()
+    def reset_parameters(self):
+        n = self.in_channels
+        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
+        if self.bias is not None:
+            fan_in, _ = _calculate_fan_in_and_fan_out_hwio(self.weight)
+            bound = 1 / math.sqrt(fan_in)
+            init.uniform_(self.bias, -bound, bound)
+    def forward(self, input):
+        assert isinstance(input, spconv.SparseConvTensor)
+        features = input.features
+        device = features.device
+        indices = input.indices
+        spatial_shape = input.spatial_shape
+        batch_size = input.batch_size
+        if not self.subm:
+            if self.transposed:
+                out_spatial_shape = ops.get_deconv_output_size(
+                    spatial_shape, self.kernel_size, self.stride, self.padding, self.dilation, self.output_padding)
+            else:
+                out_spatial_shape = ops.get_conv_output_size(
+                    spatial_shape, self.kernel_size, self.stride, self.padding, self.dilation)
+        else:
+            out_spatial_shape = spatial_shape
+        # input.update_grid(out_spatial_shape)
+        # t = time.time()
+        if self.conv1x1:
+            input.features = torch.mm(
+                input.features,
+                self.weight.view(self.in_channels, self.out_channels))
+            if self.bias:
+                input.features += self.bias
+            return input
+        datas = input.find_indice_pair(self.indice_key)
+        if self.inverse:
+            assert datas is not None and self.indice_key is not None
+            _, outids, indice_pairs, indice_pair_num, out_spatial_shape = datas
+        else:
+            if self.indice_key is not None and datas is not None:
+                outids, _, indice_pairs, indice_pair_num, _ = datas
+            else:
+                outids, indice_pairs, indice_pair_num = ops.get_indice_pairs(
+                    indices, batch_size, spatial_shape, self.kernel_size,
+                    self.stride, self.padding, self.dilation, self.output_padding, self.subm, self.transposed, grid=input.grid)
+                input.indice_dict[self.indice_key] = (outids, indices, indice_pairs, indice_pair_num, spatial_shape)
+        if self.subm:
+            out_features = Fsp.indice_subm_conv(features, self.weight,
+                                              indice_pairs.to(device),
+                                              indice_pair_num,
+                                              outids.shape[0])
+        else:
+            if self.inverse:
+                out_features = Fsp.indice_inverse_conv(features,
+                                            self.weight, indice_pairs.to(device),
+                                            indice_pair_num, outids.shape[0])
+            else:
+                out_features = Fsp.indice_conv(features,
+                                            self.weight, indice_pairs.to(device),
+                                            indice_pair_num, outids.shape[0])
+        if self.bias:
+            out_features += self.bias
+        out_tensor = spconv.SparseConvTensor(out_features, outids,
+                                             out_spatial_shape, batch_size)
+        out_tensor.indice_dict = input.indice_dict
+        out_tensor.grid = input.grid
+        return out_tensor
+class SparseConv2d(SparseConvolution):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 indice_key=None):
+        super(SparseConv2d, self).__init__(
+            2,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            groups,
+            bias,
+            indice_key=indice_key)
+class SparseConv3d(SparseConvolution):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 indice_key=None):
+        super(SparseConv3d, self).__init__(
+            3,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            groups,
+            bias,
+            indice_key=indice_key)
+class SparseConvTranspose2d(SparseConvolution):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 indice_key=None):
+        super(SparseConvTranspose2d, self).__init__(
+            2,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            groups,
+            bias,
+            transposed=True,
+            indice_key=indice_key)
+class SparseConvTranspose3d(SparseConvolution):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 indice_key=None):
+        super(SparseConvTranspose3d, self).__init__(
+            3,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            groups,
+            bias,
+            transposed=True,
+            indice_key=indice_key)
+class SparseInverseConv2d(SparseConvolution):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 indice_key,
+                 bias=True):
+        super(SparseInverseConv2d, self).__init__(
+            2,
+            in_channels,
+            out_channels,
+            bias=bias,
+            inverse=True,
+            indice_key=indice_key)
+class SparseInverseConv3d(SparseConvolution):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 indice_key,
+                 bias=True):
+        super(SparseInverseConv3d, self).__init__(
+            3,
+            in_channels,
+            out_channels,
+            bias=bias,
+            inverse=True,
+            indice_key=indice_key)
+class SubMConv2d(SparseConvolution):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 indice_key=None):
+        super(SubMConv2d, self).__init__(
+            2,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            groups,
+            bias,
+            True,
+            indice_key=indice_key)
+class SubMConv3d(SparseConvolution):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 indice_key=None):
+        super(SubMConv3d, self).__init__(
+            3,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            groups,
+            bias,
+            True,
+            indice_key=indice_key)
--- a/spconv/functional.py
+++ b/spconv/functional.py
--- a/spconv/modules.py
+++ b/spconv/modules.py
--- a/spconv/ops.py
+++ b/spconv/ops.py
--- a/spconv/pool.py
+++ b/spconv/pool.py
--- a/spconv/test_utils.py
+++ b/spconv/test_utils.py
--- a/spconv/utils/__init__.py
+++ b/spconv/utils/__init__.py
--- a/src/spconv/CMakeLists.txt
+++ b/src/spconv/CMakeLists.txt
+add_library(spconv SHARED all.cc indice.cc indice.cu 
+            reordering.cc reordering.cu maxpool.cc maxpool.cu)
+target_include_directories(spconv PRIVATE ${ALL_INCLUDE} )
+set_property(TARGET spconv PROPERTY CUDA_STANDARD 14)
+set_property(TARGET spconv PROPERTY CXX_STANDARD 14)
+set_target_properties(spconv PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+target_link_libraries(spconv PRIVATE ${ALL_LIBS})
+install (TARGETS spconv DESTINATION lib)
--- a/src/spconv/all.cc
+++ b/src/spconv/all.cc
+// Copyright 2019 Yan Yan
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// 
+//     http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <cuda_runtime_api.h>
+#include <spconv/pool_ops.h>
+#include <spconv/spconv_ops.h>
+static auto registry =
+    torch::jit::RegisterOperators("spconv::get_indice_pairs_2d", &spconv::getIndicePair<2>)
+        .op("spconv::get_indice_pairs_3d", &spconv::getIndicePair<3>)
+        .op("spconv::get_indice_pairs_grid_2d", &spconv::getIndicePairPreGrid<2>)
+        .op("spconv::get_indice_pairs_grid_3d", &spconv::getIndicePairPreGrid<3>)
+        .op("spconv::indice_conv_fp32", &spconv::indiceConv<float>)
+        .op("spconv::indice_conv_backward_fp32", &spconv::indiceConvBackward<float>)
+        .op("spconv::indice_conv_half", &spconv::indiceConv<at::Half>)
+        .op("spconv::indice_conv_backward_half",
+            &spconv::indiceConvBackward<at::Half>)
+        .op("spconv::indice_maxpool_fp32", &spconv::indiceMaxPool<float>)
+        .op("spconv::indice_maxpool_backward_fp32",
+            &spconv::indiceMaxPoolBackward<float>)
+        .op("spconv::indice_maxpool_half", &spconv::indiceMaxPool<at::Half>)
+        .op("spconv::indice_maxpool_backward_half",
+            &spconv::indiceMaxPoolBackward<at::Half>);
\ No newline at end of file
--- a/src/spconv/indice.cc
+++ b/src/spconv/indice.cc
--- a/src/spconv/indice.cu
+++ b/src/spconv/indice.cu
--- a/src/spconv/maxpool.cc
+++ b/src/spconv/maxpool.cc