Commit 2f66dd23 authored by yan.yan's avatar yan.yan
Browse files

fix cpu only build problem

parent 6a32c34f
...@@ -112,6 +112,8 @@ We offer python 3.7-3.11 and cuda 10.2/11.4/11.7/12.0 prebuilt binaries for wind ...@@ -112,6 +112,8 @@ We offer python 3.7-3.11 and cuda 10.2/11.4/11.7/12.0 prebuilt binaries for wind
For Linux users, you need to install pip >= 20.3 first to install prebuilt. For Linux users, you need to install pip >= 20.3 first to install prebuilt.
**WARNING**: spconv-cu117 may require CUDA Driver >= 515.
```pip install spconv``` for CPU only (**Linux Only**). you should only use this for debug usage, the performance isn't optimized due to manylinux limit (no omp support). ```pip install spconv``` for CPU only (**Linux Only**). you should only use this for debug usage, the performance isn't optimized due to manylinux limit (no omp support).
```pip install spconv-cu102``` for CUDA 10.2 ```pip install spconv-cu102``` for CUDA 10.2
......
[build-system] [build-system]
requires = ["setuptools>=41.0", "wheel", "pccm>=0.4.0", "cumm>=0.3.0"] requires = ["setuptools>=41.0", "wheel", "pccm>=0.4.0", "cumm>=0.3.1"]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"
...@@ -38,9 +38,9 @@ if cuda_ver: ...@@ -38,9 +38,9 @@ if cuda_ver:
cuda_ver = cuda_ver.replace(".", "") # 10.2 to 102 cuda_ver = cuda_ver.replace(".", "") # 10.2 to 102
RELEASE_NAME += "-cu{}".format(cuda_ver) RELEASE_NAME += "-cu{}".format(cuda_ver)
deps = ["cumm-cu{}>=0.3.0".format(cuda_ver)] deps = ["cumm-cu{}>=0.3.1".format(cuda_ver)]
else: else:
deps = ["cumm>=0.3.0"] deps = ["cumm>=0.3.1"]
......
...@@ -64,7 +64,7 @@ def get_gemm_algo_desp_from_param(p: GemmAlgoParams): ...@@ -64,7 +64,7 @@ def get_gemm_algo_desp_from_param(p: GemmAlgoParams):
desp.element_per_access_a = ker.input_spec.input_iter_a.element_per_acc desp.element_per_access_a = ker.input_spec.input_iter_a.element_per_acc
desp.element_per_access_b = ker.input_spec.input_iter_b.element_per_acc desp.element_per_access_b = ker.input_spec.input_iter_b.element_per_acc
desp.element_per_access_c = ker.output_spec.out_iter.element_per_acc desp.element_per_access_c = ker.output_spec.out_iter.element_per_acc
desp.min_arch = ker.min_arch()
return desp return desp
...@@ -87,6 +87,7 @@ def get_conv_algo_desp_from_param(p: ConvAlgoParams): ...@@ -87,6 +87,7 @@ def get_conv_algo_desp_from_param(p: ConvAlgoParams):
desp.element_per_access_a = ker.input_spec.input_iter_a.element_per_acc desp.element_per_access_a = ker.input_spec.input_iter_a.element_per_acc
desp.element_per_access_b = ker.input_spec.input_iter_b.element_per_acc desp.element_per_access_b = ker.input_spec.input_iter_b.element_per_acc
desp.element_per_access_c = ker.output_spec.out_iter.element_per_acc desp.element_per_access_c = ker.output_spec.out_iter.element_per_acc
desp.min_arch = ker.min_arch()
return desp return desp
......
...@@ -13,14 +13,10 @@ ...@@ -13,14 +13,10 @@
# limitations under the License. # limitations under the License.
import spconv.core_cc as _ext import spconv.core_cc as _ext
if hasattr(_ext, "cumm"):
CPU_ONLY_BUILD = False
else:
CPU_ONLY_BUILD = True
from spconv.core_cc.csrc.sparse.all import SpconvOps from spconv.core_cc.csrc.sparse.all import SpconvOps
CPU_ONLY_BUILD = SpconvOps.is_cpu_only_build()
BUILD_CUMM_VERSION = SpconvOps.cumm_version() BUILD_CUMM_VERSION = SpconvOps.cumm_version()
BUILD_PCCM_VERSION = SpconvOps.pccm_version() BUILD_PCCM_VERSION = SpconvOps.pccm_version()
from spconv.core_cc.csrc.utils.boxops import BoxOps from spconv.core_cc.csrc.utils.boxops import BoxOps
......
...@@ -84,6 +84,10 @@ class HashCoreHost(pccm.Class): ...@@ -84,6 +84,10 @@ class HashCoreHost(pccm.Class):
self.add_include("tensorview/hash/hash_core.h") self.add_include("tensorview/hash/hash_core.h")
class SpconvOps(pccm.Class): class SpconvOps(pccm.Class):
if CUMM_CPU_ONLY_BUILD:
_STATIC_FUNCTION = pccm.static_function
else:
_STATIC_FUNCTION = pccm.cuda.static_function
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self.add_dependency(ThrustCustomAllocatorV2, ExternalAllocator, GemmBasicHost, ThrustAllocator) self.add_dependency(ThrustCustomAllocatorV2, ExternalAllocator, GemmBasicHost, ThrustAllocator)
...@@ -143,6 +147,15 @@ class SpconvOps(pccm.Class): ...@@ -143,6 +147,15 @@ class SpconvOps(pccm.Class):
""") """)
return code.ret("std::string") return code.ret("std::string")
@pccm.pybind.mark
@pccm.static_function
def is_cpu_only_build(self):
code = pccm.FunctionCode()
code.raw(f"""
return {pccm.literal(CUMM_CPU_ONLY_BUILD)};
""")
return code.ret("bool")
@pccm.pybind.mark @pccm.pybind.mark
@pccm.static_function @pccm.static_function
def pccm_version(self): def pccm_version(self):
...@@ -155,7 +168,7 @@ class SpconvOps(pccm.Class): ...@@ -155,7 +168,7 @@ class SpconvOps(pccm.Class):
return code.ret("std::string") return code.ret("std::string")
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def generate_conv_inds_stage1(self): def generate_conv_inds_stage1(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
code.arg("indices", "tv::Tensor") code.arg("indices", "tv::Tensor")
...@@ -200,7 +213,7 @@ class SpconvOps(pccm.Class): ...@@ -200,7 +213,7 @@ class SpconvOps(pccm.Class):
return code # .ret("int") return code # .ret("int")
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def generate_conv_inds_stage1_5(self): def generate_conv_inds_stage1_5(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
code.arg("indice_pairs_uniq", "tv::Tensor") code.arg("indice_pairs_uniq", "tv::Tensor")
...@@ -219,7 +232,7 @@ class SpconvOps(pccm.Class): ...@@ -219,7 +232,7 @@ class SpconvOps(pccm.Class):
return code.ret("int") return code.ret("int")
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def generate_conv_inds_stage2(self): def generate_conv_inds_stage2(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
code.arg("indices, hashdata_k, hashdata_v", "tv::Tensor") code.arg("indices, hashdata_k, hashdata_v", "tv::Tensor")
...@@ -270,7 +283,7 @@ class SpconvOps(pccm.Class): ...@@ -270,7 +283,7 @@ class SpconvOps(pccm.Class):
return code.ret("int") return code.ret("int")
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def generate_conv_inds_mask_stage1(self): def generate_conv_inds_mask_stage1(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
if CUMM_CPU_ONLY_BUILD: if CUMM_CPU_ONLY_BUILD:
...@@ -316,7 +329,7 @@ class SpconvOps(pccm.Class): ...@@ -316,7 +329,7 @@ class SpconvOps(pccm.Class):
return code # .ret("int") return code # .ret("int")
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def generate_conv_inds_mask_stage1_direct_table(self): def generate_conv_inds_mask_stage1_direct_table(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
if CUMM_CPU_ONLY_BUILD: if CUMM_CPU_ONLY_BUILD:
...@@ -362,7 +375,7 @@ class SpconvOps(pccm.Class): ...@@ -362,7 +375,7 @@ class SpconvOps(pccm.Class):
return code # .ret("int") return code # .ret("int")
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def unique_hash(self): def unique_hash(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
if CUMM_CPU_ONLY_BUILD: if CUMM_CPU_ONLY_BUILD:
...@@ -378,7 +391,7 @@ class SpconvOps(pccm.Class): ...@@ -378,7 +391,7 @@ class SpconvOps(pccm.Class):
return code.ret("int") return code.ret("int")
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def assign_output_direct_hash(self): def assign_output_direct_hash(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
if CUMM_CPU_ONLY_BUILD: if CUMM_CPU_ONLY_BUILD:
...@@ -420,7 +433,7 @@ class SpconvOps(pccm.Class): ...@@ -420,7 +433,7 @@ class SpconvOps(pccm.Class):
return code return code
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def generate_conv_inds_mask_stage2(self): def generate_conv_inds_mask_stage2(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
if CUMM_CPU_ONLY_BUILD: if CUMM_CPU_ONLY_BUILD:
...@@ -470,7 +483,7 @@ class SpconvOps(pccm.Class): ...@@ -470,7 +483,7 @@ class SpconvOps(pccm.Class):
return code.ret("int") return code.ret("int")
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def generate_conv_inds_stage2_mask_direct_table(self): def generate_conv_inds_stage2_mask_direct_table(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
if CUMM_CPU_ONLY_BUILD: if CUMM_CPU_ONLY_BUILD:
...@@ -519,7 +532,7 @@ class SpconvOps(pccm.Class): ...@@ -519,7 +532,7 @@ class SpconvOps(pccm.Class):
return code.ret("int") return code.ret("int")
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def generate_subm_conv_inds(self): def generate_subm_conv_inds(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
if CUMM_CPU_ONLY_BUILD: if CUMM_CPU_ONLY_BUILD:
...@@ -634,7 +647,7 @@ class SpconvOps(pccm.Class): ...@@ -634,7 +647,7 @@ class SpconvOps(pccm.Class):
return code.ret("int") return code.ret("int")
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def maxpool_forward(self): def maxpool_forward(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
if CUMM_CPU_ONLY_BUILD: if CUMM_CPU_ONLY_BUILD:
...@@ -651,7 +664,7 @@ class SpconvOps(pccm.Class): ...@@ -651,7 +664,7 @@ class SpconvOps(pccm.Class):
return code return code
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def maxpool_backward(self): def maxpool_backward(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
if CUMM_CPU_ONLY_BUILD: if CUMM_CPU_ONLY_BUILD:
...@@ -671,7 +684,7 @@ class SpconvOps(pccm.Class): ...@@ -671,7 +684,7 @@ class SpconvOps(pccm.Class):
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def indice_maxpool(self): def indice_maxpool(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
code.arg("out_features, features", "tv::Tensor") code.arg("out_features, features", "tv::Tensor")
...@@ -715,7 +728,7 @@ class SpconvOps(pccm.Class): ...@@ -715,7 +728,7 @@ class SpconvOps(pccm.Class):
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def indice_maxpool_backward(self): def indice_maxpool_backward(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
code.arg("din, features, out_features, out_bp", "tv::Tensor") code.arg("din, features, out_features, out_bp", "tv::Tensor")
...@@ -757,7 +770,7 @@ class SpconvOps(pccm.Class): ...@@ -757,7 +770,7 @@ class SpconvOps(pccm.Class):
return code return code
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def maxpool_implicit_gemm_forward(self): def maxpool_implicit_gemm_forward(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
if CUMM_CPU_ONLY_BUILD: if CUMM_CPU_ONLY_BUILD:
...@@ -773,7 +786,7 @@ class SpconvOps(pccm.Class): ...@@ -773,7 +786,7 @@ class SpconvOps(pccm.Class):
return code return code
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def maxpool_implicit_gemm_backward(self): def maxpool_implicit_gemm_backward(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
if CUMM_CPU_ONLY_BUILD: if CUMM_CPU_ONLY_BUILD:
...@@ -791,7 +804,7 @@ class SpconvOps(pccm.Class): ...@@ -791,7 +804,7 @@ class SpconvOps(pccm.Class):
return code return code
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def avgpool_implicit_gemm_forward(self): def avgpool_implicit_gemm_forward(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
if CUMM_CPU_ONLY_BUILD: if CUMM_CPU_ONLY_BUILD:
...@@ -808,7 +821,7 @@ class SpconvOps(pccm.Class): ...@@ -808,7 +821,7 @@ class SpconvOps(pccm.Class):
return code return code
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def avgpool_implicit_gemm_backward(self): def avgpool_implicit_gemm_backward(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
if CUMM_CPU_ONLY_BUILD: if CUMM_CPU_ONLY_BUILD:
...@@ -936,19 +949,19 @@ class SpconvOps(pccm.Class): ...@@ -936,19 +949,19 @@ class SpconvOps(pccm.Class):
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def sort_1d_by_key_allocator(self): def sort_1d_by_key_allocator(self):
# for python # for python
return self.sort_1d_by_key_allocator_template(False) return self.sort_1d_by_key_allocator_template(False)
@pccm.cuda.static_function @_STATIC_FUNCTION
def sort_1d_by_key_allocator_v2(self): def sort_1d_by_key_allocator_v2(self):
# for cpp only # for cpp only
return self.sort_1d_by_key_allocator_template(True) return self.sort_1d_by_key_allocator_template(True)
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def sort_1d_by_key_split(self): def sort_1d_by_key_split(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
if CUMM_CPU_ONLY_BUILD: if CUMM_CPU_ONLY_BUILD:
...@@ -1070,17 +1083,17 @@ class SpconvOps(pccm.Class): ...@@ -1070,17 +1083,17 @@ class SpconvOps(pccm.Class):
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def sort_1d_by_key_split_allocator(self): def sort_1d_by_key_split_allocator(self):
return self.sort_1d_by_key_split_allocator_template(False) return self.sort_1d_by_key_split_allocator_template(False)
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def sort_1d_by_key_split_allocator_v2(self): def sort_1d_by_key_split_allocator_v2(self):
return self.sort_1d_by_key_split_allocator_template(True) return self.sort_1d_by_key_split_allocator_template(True)
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def count_bits(self): def count_bits(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
if CUMM_CPU_ONLY_BUILD: if CUMM_CPU_ONLY_BUILD:
...@@ -1140,7 +1153,7 @@ class SpconvOps(pccm.Class): ...@@ -1140,7 +1153,7 @@ class SpconvOps(pccm.Class):
return code.ret("tv::Tensor") return code.ret("tv::Tensor")
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def reverse_bits(self): def reverse_bits(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
if CUMM_CPU_ONLY_BUILD: if CUMM_CPU_ONLY_BUILD:
...@@ -1202,7 +1215,7 @@ class SpconvOps(pccm.Class): ...@@ -1202,7 +1215,7 @@ class SpconvOps(pccm.Class):
# cpu only build can't use pccm.cuda # cpu only build can't use pccm.cuda
__CUDA_DECORATOR = pccm.static_function __CUDA_DECORATOR = pccm.static_function
if not CUMM_CPU_ONLY_BUILD: if not CUMM_CPU_ONLY_BUILD:
__CUDA_DECORATOR = pccm.cuda.static_function __CUDA_DECORATOR = _STATIC_FUNCTION
@pccm.pybind.mark @pccm.pybind.mark
@__CUDA_DECORATOR @__CUDA_DECORATOR
...@@ -1243,7 +1256,7 @@ class SpconvOps(pccm.Class): ...@@ -1243,7 +1256,7 @@ class SpconvOps(pccm.Class):
return code return code
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @_STATIC_FUNCTION
def sort_1d_by_key(self): def sort_1d_by_key(self):
code = pccm.FunctionCode() code = pccm.FunctionCode()
if CUMM_CPU_ONLY_BUILD: if CUMM_CPU_ONLY_BUILD:
...@@ -1475,13 +1488,16 @@ class SpconvOps(pccm.Class): ...@@ -1475,13 +1488,16 @@ class SpconvOps(pccm.Class):
""") """)
return code.ret("std::vector<int>") return code.ret("std::vector<int>")
@pccm.cuda.static_function @_STATIC_FUNCTION
def apply_thrust_unique_to_indice_pairs_uniq(self): def apply_thrust_unique_to_indice_pairs_uniq(self):
code = pccm.code() code = pccm.code()
code.add_dependency(CustomThrustLib)
code.arg("data", "tv::Tensor") code.arg("data", "tv::Tensor")
code.arg("allocator", "ThrustAllocator&") code.arg("allocator", "ThrustAllocator&")
code.arg("stream_int", f"std::uintptr_t", "0") code.arg("stream_int", f"std::uintptr_t", "0")
if CUMM_CPU_ONLY_BUILD:
return code.make_invalid()
code.add_dependency(CustomThrustLib)
code.raw(f""" code.raw(f"""
int num_out_act = 0; int num_out_act = 0;
int uniq_size = data.dim(0); int uniq_size = data.dim(0);
...@@ -1622,7 +1638,7 @@ class SpconvOps(pccm.Class): ...@@ -1622,7 +1638,7 @@ class SpconvOps(pccm.Class):
if CUMM_CPU_ONLY_BUILD: if CUMM_CPU_ONLY_BUILD:
code.raw(f""" code.raw(f"""
throw std::runtime_error("this function can only be used with CUDA.") TV_THROW_RT_ERR("this function can only be used with CUDA.");
""") """)
return code.ret("std::tuple<tv::Tensor, int>") return code.ret("std::tuple<tv::Tensor, int>")
code.raw(f""" code.raw(f"""
......
...@@ -624,6 +624,9 @@ class GemmTunerSimple(pccm.ParameterizedClass): ...@@ -624,6 +624,9 @@ class GemmTunerSimple(pccm.ParameterizedClass):
code.arg("shuffle_type", "int") code.arg("shuffle_type", "int")
code.arg("a_inds_shape, b_inds_shape, c_inds_shape", code.arg("a_inds_shape, b_inds_shape, c_inds_shape",
"std::vector<int64_t>") "std::vector<int64_t>")
if CUMM_CPU_ONLY_BUILD:
code.raw(f"TV_THROW_RT_ERR(\"not implemented for cpu!!!\")")
return code.ret("std::tuple<int, int, int>")
code.raw(f""" code.raw(f"""
return GemmMain::extract_mnk(a_shape, b_shape, trans_a, return GemmMain::extract_mnk(a_shape, b_shape, trans_a,
trans_b, trans_c, trans_b, trans_c,
......
...@@ -148,7 +148,10 @@ class InferenceOpsKernel(pccm.ParameterizedClass): ...@@ -148,7 +148,10 @@ class InferenceOpsKernel(pccm.ParameterizedClass):
class InferenceOps(pccm.Class): class InferenceOps(pccm.Class):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self.add_dependency(TensorView, LaunchUtils) self.add_dependency(TensorView)
if not CUMM_CPU_ONLY_BUILD:
self.add_dependency(LaunchUtils)
self.kernel = InferenceOpsKernel() self.kernel = InferenceOpsKernel()
self.add_include("tensorview/gemm/core/constants.h") self.add_include("tensorview/gemm/core/constants.h")
self.add_static_const("kMaxGridYZDim", "int", "65535") self.add_static_const("kMaxGridYZDim", "int", "65535")
......
...@@ -30,14 +30,13 @@ from spconv.constants import SPCONV_CPP_INDICE_PAIRS, SPCONV_CPP_INDICE_PAIRS_IG ...@@ -30,14 +30,13 @@ from spconv.constants import SPCONV_CPP_INDICE_PAIRS, SPCONV_CPP_INDICE_PAIRS_IG
import spconv.core_cc as _ext import spconv.core_cc as _ext
from spconv.core_cc.csrc.sparse.convops.spops import ConvGemmOps from spconv.core_cc.csrc.sparse.convops.spops import ConvGemmOps
from spconv.core_cc.csrc.sparse.inference import InferenceOps from spconv.core_cc.csrc.sparse.inference import InferenceOps
from spconv.cppconstants import CPU_ONLY_BUILD
from spconv.utils import nullcontext from spconv.utils import nullcontext
if hasattr(_ext, "cumm"): if not CPU_ONLY_BUILD:
CPU_ONLY_BUILD = False
from spconv.algo import GEMM, CONV, GEMM_CPP, CONV_CPP from spconv.algo import GEMM, CONV, GEMM_CPP, CONV_CPP
else: else:
CPU_ONLY_BUILD = True
GEMM = None GEMM = None
CONV = None CONV = None
GEMM_CPP = None GEMM_CPP = None
...@@ -1175,7 +1174,6 @@ def indice_conv_backward(features: torch.Tensor, ...@@ -1175,7 +1174,6 @@ def indice_conv_backward(features: torch.Tensor,
return (din, dfilters.reshape(filters_shape)) return (din, dfilters.reshape(filters_shape))
maxnhot = max(indice_pair_num_cpu) maxnhot = max(indice_pair_num_cpu)
arch = get_arch()
filters_tv = torch_tensor_to_tv(filters) filters_tv = torch_tensor_to_tv(filters)
dfilters_tv = torch_tensor_to_tv(dfilters) dfilters_tv = torch_tensor_to_tv(dfilters)
...@@ -1224,7 +1222,7 @@ def indice_conv_backward(features: torch.Tensor, ...@@ -1224,7 +1222,7 @@ def indice_conv_backward(features: torch.Tensor,
torch.mm(out_buffer[:nhot], filters_KC, out=inp_buffer[:nhot]) torch.mm(out_buffer[:nhot], filters_KC, out=inp_buffer[:nhot])
SpconvOps.scatter_add_cpu(din_tv, inp_buffer_tv, inp_indices) SpconvOps.scatter_add_cpu(din_tv, inp_buffer_tv, inp_indices)
return (din, dfilters.reshape(filters_shape)) return (din, dfilters.reshape(filters_shape))
arch = get_arch()
profile_idx = kv_center profile_idx = kv_center
if subm or indice_pair_num_cpu[profile_idx] == 0: if subm or indice_pair_num_cpu[profile_idx] == 0:
profile_idx = kv_center - 1 profile_idx = kv_center - 1
......
...@@ -22,7 +22,7 @@ from cumm import tensorview as tv ...@@ -22,7 +22,7 @@ from cumm import tensorview as tv
from spconv.core import ConvAlgo from spconv.core import ConvAlgo
import spconv.pytorch as spconv import spconv.pytorch as spconv
from spconv.utils import Point2VoxelCPU3d, Point2VoxelGPU3d from spconv.utils import Point2VoxelCPU3d
# torch.backends.cudnn.enabled = False # torch.backends.cudnn.enabled = False
def waymo_data(batch_size=1, num_features=-1): def waymo_data(batch_size=1, num_features=-1):
...@@ -44,6 +44,8 @@ def waymo_data(batch_size=1, num_features=-1): ...@@ -44,6 +44,8 @@ def waymo_data(batch_size=1, num_features=-1):
return voxels, coors, gen.grid_size return voxels, coors, gen.grid_size
def waymo_data_large(batch_size=1): def waymo_data_large(batch_size=1):
from spconv.utils import Point2VoxelGPU3d
gen = Point2VoxelGPU3d([0.1, 0.1, 0.1], [-80, -80, -2, 80, 80, 6], 3, gen = Point2VoxelGPU3d([0.1, 0.1, 0.1], [-80, -80, -2, 80, 80, 6], 3,
1600000, 1) 1600000, 1)
# gen = VoxelGeneratorV2([0.1, 0.1, 0.1], [-80, -80, -2, 80, 80, 6], 1, # gen = VoxelGeneratorV2([0.1, 0.1, 0.1], [-80, -80, -2, 80, 80, 6], 1,
...@@ -395,7 +397,7 @@ def main(): ...@@ -395,7 +397,7 @@ def main():
# voxels, coors, spatial_shape = waymo_data(num_features=3) # voxels, coors, spatial_shape = waymo_data(num_features=3)
with open(Path(__file__).parent / "data" / "test_spconv.pkl", "rb") as f: with open(Path(__file__).parent / "data" / "test_spconv.pkl", "rb") as f:
(voxels, coors, spatial_shape) = pickle.load(f) (voxels, coors, spatial_shape) = pickle.load(f)
voxels, coors, spatial_shape = waymo_data_large() # voxels, coors, spatial_shape = waymo_data_large()
# breakpoint() # breakpoint()
print(spatial_shape) print(spatial_shape)
...@@ -478,11 +480,11 @@ def main(): ...@@ -478,11 +480,11 @@ def main():
# for i in range(10): # for i in range(10):
# out = net(voxels_th, coors_th, 1) # out = net(voxels_th, coors_th, 1)
# print("------------") # print("------------")
# torch.cuda.synchronize() # # torch.cuda.synchronize()
# t = time.time() # # t = time.time()
# out.features.backward(dout_t) # out.features.backward(dout_t)
# torch.cuda.synchronize() # # torch.cuda.synchronize()
# times.append(time.time() - t) # # times.append(time.time() - t)
# # # print((net.grid == -1).float().sum(), net.grid.numel()) # # # print((net.grid == -1).float().sum(), net.grid.numel())
# # # print("spconv time", time.time() - t) # # # print("spconv time", time.time() - t)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment