Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
one
spconv
Commits
21bb00ae
Commit
21bb00ae
authored
Jul 27, 2022
by
Yan Yan
Browse files
still working on c++ only
parent
899008fa
Changes
25
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
994 additions
and
313 deletions
+994
-313
example/tensorrt/README.md
example/tensorrt/README.md
+17
-0
setup.py
setup.py
+1
-1
spconv/build.py
spconv/build.py
+1
-2
spconv/constants.py
spconv/constants.py
+4
-2
spconv/core_cc/csrc/sparse/all/__init__.pyi
spconv/core_cc/csrc/sparse/all/__init__.pyi
+54
-10
spconv/core_cc/csrc/sparse/alloc.pyi
spconv/core_cc/csrc/sparse/alloc.pyi
+8
-8
spconv/csrc/sparse/all.py
spconv/csrc/sparse/all.py
+258
-36
spconv/csrc/sparse/alloc.py
spconv/csrc/sparse/alloc.py
+107
-29
spconv/csrc/sparse/convops.py
spconv/csrc/sparse/convops.py
+13
-14
spconv/csrc/sparse/indices.py
spconv/csrc/sparse/indices.py
+115
-67
spconv/csrc/sparse/maxpool.py
spconv/csrc/sparse/maxpool.py
+165
-0
spconv/csrc/sparse/pointops.py
spconv/csrc/sparse/pointops.py
+2
-2
spconv/gencode/__init__.py
spconv/gencode/__init__.py
+14
-0
spconv/gencode/__main__.py
spconv/gencode/__main__.py
+14
-5
spconv/pytorch/__init__.py
spconv/pytorch/__init__.py
+3
-1
spconv/pytorch/conv.py
spconv/pytorch/conv.py
+175
-122
spconv/pytorch/core.py
spconv/pytorch/core.py
+18
-8
spconv/pytorch/cppcore.py
spconv/pytorch/cppcore.py
+4
-4
spconv/pytorch/functional.py
spconv/pytorch/functional.py
+20
-0
spconv/pytorch/modules.py
spconv/pytorch/modules.py
+1
-2
No files found.
example/tensorrt/README.md
0 → 100644
View file @
21bb00ae
<!--
Copyright 2022 Yan Yan
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
TODO
\ No newline at end of file
setup.py
View file @
21bb00ae
...
...
@@ -175,7 +175,7 @@ if disable_jit is not None and disable_jit == "1":
std
=
"c++14"
else
:
std
=
"c++17"
if
CUMM_CPU_ONLY_BUILD
:
if
not
CUMM_CPU_ONLY_BUILD
:
gemmtuner
=
GemmTunerSimple
(
cu
)
gemmtuner
.
namespace
=
"csrc.sparse.convops.gemmops"
convtuner
=
ConvTunerSimple
(
convcu
)
...
...
spconv/build.py
View file @
21bb00ae
...
...
@@ -62,8 +62,7 @@ if project_is_installed(PACKAGE_NAME) and project_is_editable(
CompileInfo
(),
ExternalAllocator
(),
ExternalSpconvMatmul
(),
SimpleExternalSpconvMatmul
(),
SimpleExternalSpconvMatmul
(),
# for debug, won't be included in release
]
pccm
.
builder
.
build_pybind
(
cus
,
PACKAGE_ROOT
/
"core_cc"
,
...
...
spconv/constants.py
View file @
21bb00ae
...
...
@@ -64,7 +64,7 @@ SPCONV_DEBUG_CPP_ONLY = project_is_editable(PACKAGE_NAME)
class
AllocKeys
:
Pair
=
"Pair"
Pair
Bwd
=
"Pair
Bwd
"
IndiceNumPerLoc
=
"IndiceNumPerLoc"
PairMask
=
"PairMask"
MaskArgSort
=
"MaskArgSort"
...
...
@@ -102,4 +102,6 @@ SPCONV_DEBUG_WEIGHT = False
SPCONV_CPP_INDICE_PAIRS
=
True
SPCONV_CPP_INDICE_PAIRS_IGEMM
=
True
SPCONV_CPP_GEMM
=
True
\ No newline at end of file
SPCONV_CPP_GEMM
=
True
SPCONV_FX_TRACE_MODE
=
os
.
getenv
(
"SPCONV_FX_TRACE_MODE"
,
"0"
)
==
"1"
\ No newline at end of file
spconv/core_cc/csrc/sparse/all/__init__.pyi
View file @
21bb00ae
...
...
@@ -240,6 +240,28 @@ class SpconvOps:
"""
...
@staticmethod
def avgpool_implicit_gemm_forward(out: Tensor, inp: Tensor, inds: Tensor, count_out: Tensor, stream: int = 0) -> None:
"""
Args:
out:
inp:
inds:
count_out:
stream:
"""
...
@staticmethod
def avgpool_implicit_gemm_backward(dout: Tensor, dinp: Tensor, inds: Tensor, count_out: Tensor, stream: int = 0) -> None:
"""
Args:
dout:
dinp:
inds:
count_out:
stream:
"""
...
@staticmethod
def maxpool_forward_cpu(out: Tensor, inp: Tensor, out_inds: Tensor, in_inds: Tensor) -> None:
"""
Args:
...
...
@@ -280,15 +302,6 @@ class SpconvOps:
"""
...
@staticmethod
def sort_1d_by_key(data: Tensor, indices: Tensor = Tensor(), stream: int = 0) -> Tensor:
"""
Args:
data:
indices:
stream:
"""
...
@staticmethod
def sort_1d_by_key_allocator(data: Tensor, alloc_func, indices: Tensor = Tensor(), stream: int = 0) -> Tensor:
"""
Args:
...
...
@@ -348,6 +361,24 @@ class SpconvOps:
"""
...
@staticmethod
def maximum_value_int(data: Tensor, value: int, stream_int: int) -> None:
"""
Args:
data:
value:
stream_int:
"""
...
@staticmethod
def sort_1d_by_key(data: Tensor, indices: Tensor = Tensor(), stream: int = 0) -> Tensor:
"""
Args:
data:
indices:
stream:
"""
...
@staticmethod
def calc_point2voxel_meta_data(vsize_xyz: List[float], coors_range_xyz: List[float]) -> Tuple[List[float], List[int], List[int], List[float]]:
"""
Args:
...
...
@@ -407,6 +438,18 @@ class SpconvOps:
"""
...
@staticmethod
def get_indice_gen_tensors_from_workspace(workspace, kv: int, num_act_in: int, num_act_out_bound: int, subm: bool, use_int64_hash_k: bool) -> Dict[str, Tensor]:
"""
Args:
workspace:
kv:
num_act_in:
num_act_out_bound:
subm:
use_int64_hash_k:
"""
...
@staticmethod
def get_indice_pairs_implicit_gemm(allocator, indices: Tensor, batch_size: int, input_dims: List[int], algo: int, ksize: List[int], stride: List[int], padding: List[int], dilation: List[int], out_padding: List[int], subm: bool, transposed: bool, is_train: bool, stream_int: int = 0, num_out_act_bound: int = -1) -> Tuple[Tensor, int]:
"""
Args:
...
...
@@ -428,7 +471,7 @@ class SpconvOps:
"""
...
@staticmethod
def get_indice_pairs(allocator, indices: Tensor, batch_size: int, input_dims: List[int], algo: int, ksize: List[int], stride: List[int], padding: List[int], dilation: List[int], out_padding: List[int], subm: bool, transposed: bool, stream_int: int = 0, num_out_act_bound: int = -1) -> int:
def get_indice_pairs(allocator, indices: Tensor, batch_size: int, input_dims: List[int], algo: int, ksize: List[int], stride: List[int], padding: List[int], dilation: List[int], out_padding: List[int], subm: bool, transposed: bool, stream_int: int = 0, num_out_act_bound: int =
-1, num_input_act_bound: int =
-1) -> int:
"""
Args:
allocator:
...
...
@@ -445,5 +488,6 @@ class SpconvOps:
transposed:
stream_int:
num_out_act_bound:
num_input_act_bound:
"""
...
spconv/core_cc/csrc/sparse/alloc.pyi
View file @
21bb00ae
...
...
@@ -2,29 +2,29 @@ from typing import overload, Any, Callable, Dict, List, Optional, Set, Tuple, Ty
from pccm.stubs import EnumValue, EnumClassValue
from cumm.tensorview import Tensor
class ExternalAllocator:
def zeros(self, name: str, shape: List[int], dtype: int, device: int, is_temp_memory: bool = False
, stream: int = 0
) -> Tensor:
def zeros(self, name: str, shape: List[int], dtype: int, device: int,
stream: int = 0,
is_temp_memory: bool = False) -> Tensor:
"""
Args:
name:
shape:
dtype:
device:
is_temp_memory:
stream:
is_temp_memory:
"""
...
def empty(self, name: str, shape: List[int], dtype: int, device: int, is_temp_memory: bool = False
, stream: int = 0
) -> Tensor:
def empty(self, name: str, shape: List[int], dtype: int, device: int,
stream: int = 0,
is_temp_memory: bool = False) -> Tensor:
"""
Args:
name:
shape:
dtype:
device:
is_temp_memory:
stream:
is_temp_memory:
"""
...
def full_int(self, name: str, shape: List[int], value: int, dtype: int, device: int, is_temp_memory: bool = False
, stream: int = 0
) -> Tensor:
def full_int(self, name: str, shape: List[int], value: int, dtype: int, device: int,
stream: int = 0,
is_temp_memory: bool = False) -> Tensor:
"""
Args:
name:
...
...
@@ -32,11 +32,11 @@ class ExternalAllocator:
value:
dtype:
device:
is_temp_memory:
stream:
is_temp_memory:
"""
...
def full_float(self, name: str, shape: List[int], value: float, dtype: int, device: int, is_temp_memory: bool = False
, stream: int = 0
) -> Tensor:
def full_float(self, name: str, shape: List[int], value: float, dtype: int, device: int,
stream: int = 0,
is_temp_memory: bool = False) -> Tensor:
"""
Args:
name:
...
...
@@ -44,8 +44,8 @@ class ExternalAllocator:
value:
dtype:
device:
is_temp_memory:
stream:
is_temp_memory:
"""
...
def get_tensor_by_name(self, name: str) -> Tensor:
...
...
spconv/csrc/sparse/all.py
View file @
21bb00ae
This diff is collapsed.
Click to expand it.
spconv/csrc/sparse/alloc.py
View file @
21bb00ae
...
...
@@ -2,7 +2,8 @@ import pccm
from
cumm.common
import
TensorView
,
TensorViewCPU
,
TensorViewKernel
,
ThrustLib
from
spconv.constants
import
AllocKeys
from
cumm.constants
import
CUMM_CPU_ONLY_BUILD
from
.indices
import
CudaCommonKernel
class
ExternalAllocatorGuard
(
pccm
.
Class
):
def
__init__
(
self
):
super
().
__init__
()
...
...
@@ -53,8 +54,8 @@ class ExternalAllocator(pccm.Class):
code
.
arg
(
"shape"
,
"std::vector<int64_t>"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
return
code
.
ret
(
"tv::Tensor"
)
...
...
@@ -66,8 +67,8 @@ class ExternalAllocator(pccm.Class):
code
.
arg
(
"shape"
,
"std::vector<int64_t>"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
return
code
.
ret
(
"tv::Tensor"
)
...
...
@@ -80,8 +81,8 @@ class ExternalAllocator(pccm.Class):
code
.
arg
(
"value"
,
"int"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
return
code
.
ret
(
"tv::Tensor"
)
...
...
@@ -94,8 +95,9 @@ class ExternalAllocator(pccm.Class):
code
.
arg
(
"value"
,
"float"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
return
code
.
ret
(
"tv::Tensor"
)
@
pccm
.
pybind
.
mark
(
virtual
=
True
)
...
...
@@ -129,7 +131,7 @@ class ExternalAllocator(pccm.Class):
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
raw
(
f
"""
// "" means temp memory
auto ten = zeros(name, shape, dtype, device,
true,
stream);
auto ten = zeros(name, shape, dtype, device, stream
, true
);
return std::make_
{
self
.
ptr_type
}
<ExternalAllocatorGuard>(ten, [this](tv::Tensor ten){{
this->free(ten);
}});
...
...
@@ -145,7 +147,7 @@ class ExternalAllocator(pccm.Class):
code
.
arg
(
"name"
,
"std::string"
,
"
\"\"
"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
raw
(
f
"""
auto ten = empty(name, shape, dtype, device,
true,
stream);
auto ten = empty(name, shape, dtype, device, stream
, true
);
return std::make_
{
self
.
ptr_type
}
<ExternalAllocatorGuard>(ten, [this](tv::Tensor ten){{
this->free(ten);
}});
...
...
@@ -162,7 +164,7 @@ class ExternalAllocator(pccm.Class):
code
.
arg
(
"name"
,
"std::string"
,
"
\"\"
"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
raw
(
f
"""
auto ten = full_int(name, shape, value, dtype, device,
true,
stream);
auto ten = full_int(name, shape, value, dtype, device, stream
, true
);
return std::make_
{
self
.
ptr_type
}
<ExternalAllocatorGuard>(ten, [this](tv::Tensor ten){{
this->free(ten);
}});
...
...
@@ -179,7 +181,7 @@ class ExternalAllocator(pccm.Class):
code
.
arg
(
"name"
,
"std::string"
,
"
\"\"
"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
raw
(
f
"""
auto ten = full_float(name, shape, value, dtype, device,
true,
stream);
auto ten = full_float(name, shape, value, dtype, device, stream
, true
);
return std::make_
{
self
.
ptr_type
}
<ExternalAllocatorGuard>(ten, [this](tv::Tensor t){{
this->free(t);
}});
...
...
@@ -222,8 +224,10 @@ class ThrustAllocator(pccm.Class):
"""
)
return
code
class
StaticAllocator
(
ExternalAllocator
):
"""a s
imple
allocator for tensorrt plugin.
"""a s
tatic
allocator for tensorrt plugin.
"""
def
__init__
(
self
):
super
().
__init__
()
...
...
@@ -232,6 +236,7 @@ class StaticAllocator(ExternalAllocator):
self
.
add_member
(
"repr_"
,
"std::string"
)
self
.
add_member
(
"thrust_tmp_tensor_"
,
"tv::Tensor"
)
self
.
grow
=
1.5
self
.
cuda_common_kernel
=
CudaCommonKernel
()
@
pccm
.
pybind
.
mark
@
pccm
.
constructor
...
...
@@ -242,7 +247,22 @@ class StaticAllocator(ExternalAllocator):
code
.
raw
(
f
"""
std::stringstream ss;
for (auto& p : tensor_dict){{
tv::ssprint(ss, p.first, p.second.shape(), tv::dtype_str(p.second.dtype()), "
\\
n");
tv::sstream_print(ss, p.first, p.second.shape(), tv::dtype_str(p.second.dtype()), "
\\
n");
}}
repr_ = ss.str();
"""
)
return
code
@
pccm
.
pybind
.
mark
@
pccm
.
member_function
def
set_new_tensor_dict
(
self
):
code
=
pccm
.
code
()
code
.
arg
(
"tensor_dict"
,
"std::unordered_map<std::string, tv::Tensor>"
)
code
.
raw
(
f
"""
tensor_dict_ = tensor_dict;
std::stringstream ss;
for (auto& p : tensor_dict){{
tv::sstream_print(ss, p.first, p.second.shape(), tv::dtype_str(p.second.dtype()), "
\\
n");
}}
repr_ = ss.str();
"""
)
...
...
@@ -255,12 +275,21 @@ class StaticAllocator(ExternalAllocator):
code
.
arg
(
"shape"
,
"std::vector<int64_t>"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
raw
(
f
"""
auto res = get_tensor_by_name(name);
size_t total = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int64_t>());
TV_ASSERT_RT_ERR(res.nbytes() >= total * tv::bit_size(tv::DType(dtype))
&& res.device() == device, "alloc failed", shape, res.shape());
return tv::from_blob(res.raw_data(), shape, dtype, device);
TV_ASSERT_RT_ERR(res.nbytes() >= total * tv::bit_size(tv::DType(dtype)) / 8
&& res.device() == device, "alloc failed, tensor size too small", shape, res.shape());
// if (is_temp_memory){{
// }}else{{
// // size must exactly match
// TV_ASSERT_RT_ERR(res.nbytes() == total * tv::bit_size(tv::DType(dtype)) / 8
// && res.device() == device, "alloc failed, named memory size must match", shape, res.shape());
// }}
return tv::from_blob(res.raw_data(), shape, tv::DType(dtype), device);
"""
)
return
code
.
ret
(
"tv::Tensor"
)
...
...
@@ -273,16 +302,22 @@ class StaticAllocator(ExternalAllocator):
code
.
arg
(
"shape"
,
"std::vector<int64_t>"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
raw
(
f
"""
auto tvctx = tv::Context();
tvctx.set_cuda_stream(reinterpret_cast<cudaStream_t>(stream));
auto blob = _get_raw_and_check(name, shape, dtype, device);
"""
)
if
not
CUMM_CPU_ONLY_BUILD
:
code
.
raw
(
f
"""
tvctx.set_cuda_stream(reinterpret_cast<cudaStream_t>(stream));
"""
)
code
.
raw
(
f
"""
auto blob = _get_raw_and_check(name, shape, dtype, device, is_temp_memory);
return blob.zero_(tvctx);
"""
)
return
code
.
ret
(
"tv::Tensor"
)
@
pccm
.
pybind
.
mark
@
pccm
.
member_function
(
virtual
=
True
)
def
empty
(
self
):
...
...
@@ -291,8 +326,8 @@ class StaticAllocator(ExternalAllocator):
code
.
arg
(
"shape"
,
"std::vector<int64_t>"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
raw
(
f
"""
if (name ==
{
pccm
.
literal
(
AllocKeys
.
ThrustTemp
)
}
){{
// thrust tmp shouldn't inside tensor_dict. use a simple method to allocate
...
...
@@ -300,23 +335,28 @@ class StaticAllocator(ExternalAllocator):
// so we can just use one tensor
tv::Tensor res = thrust_tmp_tensor_;
if (res.empty()){{
res = tv::empty(shape, dtype, device);
res = tv::empty(shape,
tv::DType(
dtype
)
, device);
thrust_tmp_tensor_ = res;
}}
if (shape[0] > thrust_tmp_tensor_.dim(0)){{
res = tv::empty({{int64_t(shape[0] *
{
self
.
grow
}
)}}, dtype, device);
res = tv::empty({{int64_t(shape[0] *
{
self
.
grow
}
)}},
tv::DType(
dtype
)
, device);
thrust_tmp_tensor_ = res;
}}
return res;
}}else{{
auto blob = _get_raw_and_check(name, shape, dtype, device);
auto blob = _get_raw_and_check(name, shape, dtype, device
, is_temp_memory
);
return blob;
}}
"""
)
return
code
.
ret
(
"tv::Tensor"
)
# cpu only build can't use pccm.cuda
__CUDA_DECORATOR
=
pccm
.
member_function
if
not
CUMM_CPU_ONLY_BUILD
:
__CUDA_DECORATOR
=
pccm
.
cuda
.
member_function
@
pccm
.
pybind
.
mark
@
pccm
.
member_function
(
virtual
=
True
)
@
__CUDA_DECORATOR
def
full_int
(
self
):
code
=
pccm
.
code
()
code
.
arg
(
"name"
,
"std::string"
)
...
...
@@ -324,17 +364,36 @@ class StaticAllocator(ExternalAllocator):
code
.
arg
(
"value"
,
"int"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
raw
(
f
"""
auto tvctx = tv::Context();
auto blob = _get_raw_and_check(name, shape, dtype, device);
return blob.fill_(tvctx, value);
auto blob = _get_raw_and_check(name, shape, dtype, device, is_temp_memory);
"""
)
if
not
CUMM_CPU_ONLY_BUILD
:
code
.
add_param_class
(
"cudakers"
,
self
.
cuda_common_kernel
)
code
.
raw
(
f
"""
tvctx.set_cuda_stream(reinterpret_cast<cudaStream_t>(stream));
using ints_t = std::tuple<int32_t, int16_t, int8_t, int64_t, uint32_t, uint64_t, uint16_t, uint8_t>;
tv::Dispatch<ints_t>()(blob.dtype(), [&](auto I){{
using T = TV_DECLTYPE(I);
tv::cuda::Launch lanucher_fill(blob.size(), reinterpret_cast<cudaStream_t>(stream));
lanucher_fill(cudakers::fill_kernel<T>, blob.data_ptr<T>(), value, blob.size());
}});
"""
)
else
:
code
.
raw
(
f
"""
blob.fill_(value);
"""
)
code
.
raw
(
f
"""
return blob;
"""
)
return
code
.
ret
(
"tv::Tensor"
)
@
pccm
.
pybind
.
mark
@
pccm
.
member_function
(
virtual
=
True
)
@
__CUDA_DECORATOR
def
full_float
(
self
):
code
=
pccm
.
code
()
code
.
arg
(
"name"
,
"std::string"
)
...
...
@@ -342,11 +401,29 @@ class StaticAllocator(ExternalAllocator):
code
.
arg
(
"value"
,
"float"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
raw
(
f
"""
auto tvctx = tv::Context();
auto blob = _get_raw_and_check(name, shape, dtype, device, is_temp_memory);
"""
)
if
not
CUMM_CPU_ONLY_BUILD
:
code
.
add_param_class
(
"cudakers"
,
self
.
cuda_common_kernel
)
code
.
raw
(
f
"""
tvctx.set_cuda_stream(reinterpret_cast<cudaStream_t>(stream));
using dtypes_t = std::tuple<float, double>;
tv::Dispatch<dtypes_t>()(blob.dtype(), [&](auto I){{
using T = TV_DECLTYPE(I);
tv::cuda::Launch lanucher_fill(blob.size(), reinterpret_cast<cudaStream_t>(stream));
lanucher_fill(cudakers::fill_kernel<T>, blob.data_ptr<T>(), value, blob.size());
}});
"""
)
else
:
code
.
raw
(
f
"""
blob.fill_(value);
"""
)
code
.
raw
(
f
"""
auto blob = _get_raw_and_check(name, shape, dtype, device);
return blob.fill_(tvctx, value);
return blob;
"""
)
return
code
.
ret
(
"tv::Tensor"
)
...
...
@@ -364,6 +441,7 @@ class StaticAllocator(ExternalAllocator):
@
pccm
.
pybind
.
mark
@
pccm
.
member_function
(
virtual
=
True
)
def
free
(
self
):
# nothing here because this is a static allocator
code
=
pccm
.
code
()
code
.
arg
(
"ten"
,
"tv::Tensor"
)
return
code
...
...
spconv/csrc/sparse/convops.py
View file @
21bb00ae
...
...
@@ -78,11 +78,9 @@ class ExternalSpconvMatmul(pccm.Class):
return
code
class
SimpleExternalSpconvMatmul
(
ExternalSpconvMatmul
):
"""a helper class to warp matmul operations
because we don't want to implement matmul
(link to cublas/mkl/pytorch) in python package.
"""implement gemm in cuda via cublasLt. (only support forward)
should be used with tensorrt plugin.
"""
def
__init__
(
self
):
super
().
__init__
()
self
.
add_dependency
(
TensorView
,
ExternalAllocator
)
...
...
@@ -311,7 +309,7 @@ class SimpleExternalSpconvMatmul(ExternalSpconvMatmul):
TV_THROW_RT_ERR("unsupported");
}}
check_cublas_status(cublasLtMatmul(
handle, operationDesc, alpha_storage, a.raw_data(), Adesc, b.raw_data(),
handle, operationDesc, alpha_storage, a.
const_
raw_data(), Adesc, b.
const_
raw_data(),
Bdesc, beta_storage, c.raw_data(), Cdesc, c.raw_data(), Cdesc,
&heuristicResult.algo, nullptr, 0, stream));
if (preference)
...
...
@@ -1417,11 +1415,12 @@ class ConvGemmOps(pccm.ParameterizedClass):
is_KC_not_CK, kv_center, out_channel);
}}else{{
out_features = allocator.zeros(
{
pccm
.
literal
(
AllocKeys
.
OutFeatures
)
}
,
{{num_activate_out, out_channel}}, features.dtype(), features.device());
{{num_activate_out, out_channel}}, features.dtype(), features.device()
, stream_int
);
}}
if (kv == 1 && subm){{
return;
}}
auto indice_pair_num_cpu = indice_pair_num.cpu();
auto indice_pair_num_cpu_ptr = indice_pair_num_cpu.data_ptr<int>();
int maxnhot = 0;
...
...
@@ -1618,7 +1617,7 @@ class ConvGemmOps(pccm.ParameterizedClass):
int kv_center = kv / 2;
tv::Tensor din;
auto dfilters = allocator.zeros(
{
pccm
.
literal
(
AllocKeys
.
DFilters
)
}
,
prev_filter_shape_vec, features.dtype(), features.device());
prev_filter_shape_vec, features.dtype(), features.device()
, stream_int
);
dfilters = dfilters.view(filters.shape());
if (subm){{
din = ext_mm.indice_conv_bwd_init_gemm(
{
pccm
.
literal
(
AllocKeys
.
Features
)
}
,
...
...
@@ -1628,7 +1627,7 @@ class ConvGemmOps(pccm.ParameterizedClass):
is_KC_not_CK, kv_center);
}}else{{
din = allocator.zeros(
{
pccm
.
literal
(
AllocKeys
.
DIn
)
}
,
features.shape_vector(), features.dtype(), features.device());
features.shape_vector(), features.dtype(), features.device()
, stream_int
);
}}
if (kv == 1 && subm){{
return;
...
...
@@ -1922,10 +1921,10 @@ class ConvGemmOps(pccm.ParameterizedClass):
tv::Tensor out_features;
if (is_subm){{
out_features = allocator.empty(
{
pccm
.
literal
(
AllocKeys
.
OutFeatures
)
}
,
{{num_activate_out, out_channel}}, features.dtype(), features.device());
{{num_activate_out, out_channel}}, features.dtype(), features.device()
, stream_int
);
}}else{{
out_features = allocator.zeros(
{
pccm
.
literal
(
AllocKeys
.
OutFeatures
)
}
,
{{num_activate_out, out_channel}}, features.dtype(), features.device());
{{num_activate_out, out_channel}}, features.dtype(), features.device()
, stream_int
);
}}
auto arch = get_compute_capability();
constexpr auto kForwardInt = static_cast<int>(tv::gemm::ConvOpType::kForward);
...
...
@@ -1966,7 +1965,7 @@ class ConvGemmOps(pccm.ParameterizedClass):
if (is_train){{
mask_output_fwd = allocator.empty(
{
pccm
.
literal
(
AllocKeys
.
MaskOutputFwd
)
}
,
{{num_split, tv::div_up(num_activate_out, mask_width)}},
tv::uint32, features.device());
tv::uint32, features.device()
, stream_int
);
for (int i = 0; i < num_split; ++i){{
mask_output_fwd_splits.push_back(mask_output_fwd[i]);
}}
...
...
@@ -2042,13 +2041,13 @@ class ConvGemmOps(pccm.ParameterizedClass):
tv::Tensor din;
if (is_subm){{
din = allocator.empty(
{
pccm
.
literal
(
AllocKeys
.
DIn
)
}
,
features.shape_vector(), features.dtype(), features.device());
features.shape_vector(), features.dtype(), features.device()
, stream_int
);
}}else{{
din = allocator.zeros(
{
pccm
.
literal
(
AllocKeys
.
DIn
)
}
,
features.shape_vector(), features.dtype(), features.device());
features.shape_vector(), features.dtype(), features.device()
, stream_int
);
}}
tv::Tensor dfilters = allocator.zeros(
{
pccm
.
literal
(
AllocKeys
.
DFilters
)
}
,
filters_shape_vec, filters.dtype(), filters.device());
filters_shape_vec, filters.dtype(), filters.device()
, stream_int
);
dfilters = dfilters.view(out_channel, -1, in_channel);
constexpr auto kForwardInt = static_cast<int>(tv::gemm::ConvOpType::kForward);
...
...
spconv/csrc/sparse/indices.py
View file @
21bb00ae
This diff is collapsed.
Click to expand it.
spconv/csrc/sparse/maxpool.py
View file @
21bb00ae
...
...
@@ -180,6 +180,85 @@ class IndiceMaxPool(pccm.Class):
"""
)
return
code
@
pccm
.
cuda
.
cuda_global_function
def
forward_avgpool_implicit_gemm_kernel
(
self
):
code
=
pccm
.
FunctionCode
()
code
.
targ
(
"T"
)
code
.
arg
(
"out_features"
,
f
"T*"
)
code
.
arg
(
"in_features"
,
f
"const T*"
)
code
.
arg
(
"indices"
,
"const int*"
)
code
.
arg
(
"count_out"
,
"int*"
)
code
.
arg
(
"num_features"
,
"int"
)
code
.
arg
(
"RS"
,
"int"
)
code
.
arg
(
"num_indices"
,
"int"
)
code
.
raw
(
f
"""
for (int i : tv::KernelLoopY<int>(num_indices)) {{
auto out_ptr = out_features + i * num_features;
auto indices_ptr = indices + i;
int in_idx = 0;
int count = 0;
for (int k = 0; k < RS; ++k){{
in_idx = indices_ptr[0];
count += int(in_idx != -1);
indices_ptr += num_indices;
}}
if (count_out != nullptr){{
count_out[i] = count;
}}
for (int j : tv::KernelLoopX<int>(num_features)) {{
indices_ptr = indices + i;
int in_idx;
T in, in_temp;
in = T(0);
for (int k = 0; k < RS; ++k){{
in_idx = indices_ptr[0];
bool valid = in_idx != -1;
in_temp = valid ? in_features[in_idx * num_features + j] : T(0);
in += in_temp;
indices_ptr += num_indices;
}}
out_ptr[j] = count > 0 ? in / T(count) : T(0);
}}
}}
"""
)
return
code
@
pccm
.
cuda
.
cuda_global_function
def
backward_avgpool_implicit_gemm_kernel
(
self
):
code
=
pccm
.
FunctionCode
()
code
.
targ
(
"T"
)
code
.
arg
(
"dout_features"
,
f
"const T*"
)
code
.
arg
(
"din_features"
,
f
"T*"
)
code
.
arg
(
"indices_bwd"
,
"const int*"
)
code
.
arg
(
"count_out"
,
"const int*"
)
code
.
arg
(
"num_features"
,
"int"
)
code
.
arg
(
"RS"
,
"int"
)
code
.
arg
(
"num_indices"
,
"int"
)
code
.
raw
(
f
"""
for (int i : tv::KernelLoopY<int>(num_indices)) {{
auto din_ptr = din_features + i * num_features;
for (int j : tv::KernelLoopX<int>(num_features)) {{
auto indices_ptr = indices_bwd + i;
int out_idx = 0;
T sum_val = T(0);
for (int k = 0; k < RS; ++k){{
out_idx = indices_ptr[0];
bool valid = out_idx != -1;
T dout = valid ? dout_features[out_idx * num_features + j] : T(0);
int count = valid ? count_out[out_idx] : T(0);
sum_val += dout * T(count);
indices_ptr += num_indices;
}}
din_ptr[j] = sum_val;
}}
}}
"""
)
return
code
@
pccm
.
cuda
.
static_function
def
forward
(
self
):
code
=
pccm
.
FunctionCode
()
...
...
@@ -348,6 +427,92 @@ class IndiceMaxPool(pccm.Class):
"""
)
return
code
@
pccm
.
cuda
.
static_function
def
forward_avgpool_implicit_gemm
(
self
):
code
=
pccm
.
FunctionCode
()
code
.
arg
(
"out"
,
"tv::Tensor"
)
code
.
arg
(
"in"
,
"tv::Tensor"
)
code
.
arg
(
"inds"
,
"tv::Tensor"
)
code
.
arg
(
"count_out"
,
"tv::Tensor"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
raw
(
f
"""
auto nhot = out.dim(0);
tv::check_shape(inds, {{-1, nhot}});
tv::check_shape(in, {{-1, out.dim(1)}});
auto cudastream = reinterpret_cast<cudaStream_t>(stream);
tv::dispatch<float, double, tv::half_t, tv::bfloat16_t>(out.dtype(), [&](auto I){{
using T = TV_DECLTYPE(I);
constexpr int MaxThreads = 512;
tv::cuda::Launch launcher(1);
bool found = tv::dispatch_int_noexcept<512, 256, 128, 64, 32, 16>(out.dim(1), [](int my, int expect){{return my >= expect;}}, [&](auto V){{
// if out.dim(1) > value in list above, run this function.
// if a value is found, other value won't be executed.
int NumFeatures = TV_DECLTYPE(V)::value;
int Num0 = MaxThreads / NumFeatures;
dim3 blocks(tv::div_up(out.dim(1), int64_t(NumFeatures)), tv::div_up(nhot, int64_t(Num0)));
dim3 threads(NumFeatures, Num0);
launcher = tv::cuda::Launch(blocks, threads, cudastream);
}});
if (!found){{
int NumFeatures = 16;
int Num0 = MaxThreads / NumFeatures;
dim3 blocks(tv::div_up(out.dim(1), int64_t(NumFeatures)), tv::div_up(nhot, int64_t(Num0)));
dim3 threads(NumFeatures, Num0);
launcher = tv::cuda::Launch(blocks, threads, cudastream);
}}
launcher(forward_avgpool_implicit_gemm_kernel<T>, out.data_ptr<T>(), in.data_ptr<const T>(),
inds.data_ptr<const int>(), count_out.data_ptr<int>(), out.dim(1), inds.dim(0), inds.dim(1));
}});
"""
)
return
code
@
pccm
.
cuda
.
static_function
def
backward_avgpool_implicit_gemm
(
self
):
code
=
pccm
.
FunctionCode
()
code
.
arg
(
"dout"
,
"tv::Tensor"
)
code
.
arg
(
"din"
,
"tv::Tensor"
)
code
.
arg
(
"inds"
,
"tv::Tensor"
)
code
.
arg
(
"count_out"
,
"tv::Tensor"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
raw
(
f
"""
auto nhot = din.dim(0);
TV_ASSERT_RT_ERR(!count_out.empty(), "count out must not empty")
tv::check_shape(inds, {{-1, nhot}});
tv::check_shape(din, {{-1, dout.dim(1)}});
int num_act_out = dout.dim(1);
auto cudastream = reinterpret_cast<cudaStream_t>(stream);
tv::dispatch<float, double, tv::half_t, tv::bfloat16_t>(dout.dtype(), [&](auto I){{
using T = TV_DECLTYPE(I);
constexpr int MaxThreads = 512;
tv::cuda::Launch launcher(1);
bool found = tv::dispatch_int_noexcept<512, 256, 128, 64, 32, 16>(dout.dim(1), [](int my, int expect){{return my >= expect;}}, [&](auto V){{
// if out.dim(1) > value in list above, run this function.
// if a value is found, other value won't be executed.
int NumFeatures = TV_DECLTYPE(V)::value;
int Num0 = MaxThreads / NumFeatures;
dim3 blocks(tv::div_up(dout.dim(1), int64_t(NumFeatures)), tv::div_up(nhot, int64_t(Num0)));
dim3 threads(NumFeatures, Num0);
launcher = tv::cuda::Launch(blocks, threads, cudastream);
}});
if (!found){{
int NumFeatures = 16;
int Num0 = MaxThreads / NumFeatures;
dim3 blocks(tv::div_up(dout.dim(1), int64_t(NumFeatures)), tv::div_up(nhot, int64_t(Num0)));
dim3 threads(NumFeatures, Num0);
launcher = tv::cuda::Launch(blocks, threads, cudastream);
}}
launcher(backward_avgpool_implicit_gemm_kernel<T>,
dout.data_ptr<const T>(), din.data_ptr<T>(),
inds.data_ptr<const int>(), count_out.data_ptr<const int>(),
dout.dim(1), inds.dim(0), inds.dim(1));
}});
"""
)
return
code
class
IndiceMaxPoolCPU
(
pccm
.
Class
):
def
__init__
(
self
):
...
...
spconv/csrc/sparse/pointops.py
View file @
21bb00ae
...
...
@@ -297,7 +297,7 @@ class Point2Voxel(pccm.ParameterizedClass, pccm.pybind.PybindClassMixin):
self
.
add_dependency
(
TensorView
)
self
.
p2v_c
=
Point2VoxelCommon
(
dtype
,
ndim
,
zyx
)
self
.
add_param_class
(
"p2v_c"
,
self
.
p2v_c
,
"Point2VoxelCommon"
)
layout
=
TensorGeneric
(
ndim
,
Tru
e
)
layout
=
TensorGeneric
(
ndim
,
Fals
e
)
self
.
add_param_class
(
"layout_ns"
,
layout
,
"Layout"
)
self
.
dtype
=
dtype
self
.
ndim
=
ndim
...
...
@@ -489,7 +489,7 @@ class Point2VoxelCPU(pccm.ParameterizedClass, pccm.pybind.PybindClassMixin):
def
__init__
(
self
,
dtype
:
dtypes
.
DType
,
ndim
:
int
,
zyx
:
bool
=
True
):
super
().
__init__
()
self
.
add_dependency
(
TensorView
)
layout
=
TensorGeneric
(
ndim
,
Tru
e
)
layout
=
TensorGeneric
(
ndim
,
Fals
e
)
self
.
add_param_class
(
"layout_ns"
,
layout
,
"Layout"
)
self
.
dtype
=
dtype
self
.
ndim
=
ndim
...
...
spconv/gencode/__init__.py
View file @
21bb00ae
# Copyright 2022 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
spconv/gencode/__main__.py
View file @
21bb00ae
...
...
@@ -10,33 +10,41 @@ from spconv.core import (IMPLGEMM_SIMT_PARAMS, IMPLGEMM_TURING_PARAMS,
SHUFFLE_TURING_PARAMS
,
SHUFFLE_VOLTA_PARAMS
)
from
spconv.csrc.hash.core
import
HashTable
from
spconv.csrc.sparse.all
import
SpconvOps
from
spconv.csrc.sparse.alloc
import
ExternalAllocator
from
spconv.csrc.sparse.alloc
import
ExternalAllocator
,
StaticAllocator
from
spconv.csrc.sparse.convops
import
(
ConvGemmOps
,
ConvTunerSimple
,
ExternalSpconvMatmul
,
GemmTunerSimple
,
SimpleExternalSpconvMatmul
)
from
spconv.csrc.utils
import
BoxOps
from
cumm.gemm.algospec.core
import
(
GemmAlgo
,
ShuffleStrideType
)
from
cumm.conv.bases
import
ConvLayout
,
ConvLayoutType
,
ConvOpType
def
main
(
include
:
str
,
src
:
str
,
libname
:
str
=
"spconv"
,
prefix
:
str
=
"spconvlib"
):
prefix
:
str
=
"spconvlib"
,
inference_only
:
bool
=
False
):
all_shuffle
=
SHUFFLE_SIMT_PARAMS
+
SHUFFLE_VOLTA_PARAMS
+
SHUFFLE_TURING_PARAMS
all_shuffle
=
list
(
filter
(
lambda
x
:
not
x
.
is_nvrtc
,
all_shuffle
))
if
inference_only
:
all_shuffle
=
list
(
filter
(
lambda
x
:
x
.
shuffle_stride
!=
ShuffleStrideType
.
ShuffleAB
,
all_shuffle
))
cu
=
GemmMainUnitTest
(
all_shuffle
)
cu
.
namespace
=
"cumm.gemm.main"
all_imp
=
(
IMPLGEMM_SIMT_PARAMS
+
IMPLGEMM_VOLTA_PARAMS
+
IMPLGEMM_TURING_PARAMS
)
# all_imp = IMPLGEMM_SIMT_PARAMS
all_imp
=
list
(
filter
(
lambda
x
:
not
x
.
is_nvrtc
,
all_imp
))
if
inference_only
:
all_imp
=
list
(
filter
(
lambda
x
:
x
.
op_type
==
ConvOpType
.
kForward
,
all_imp
))
convcu
=
ConvMainUnitTest
(
all_imp
)
convcu
.
namespace
=
"cumm.conv.main"
gemmtuner
=
GemmTunerSimple
(
cu
)
gemmtuner
.
namespace
=
"csrc.sparse.convops.gemmops"
gemmtuner
.
namespace
=
"
spconv.
csrc.sparse.convops.gemmops"
convtuner
=
ConvTunerSimple
(
convcu
)
convtuner
.
namespace
=
"csrc.sparse.convops.convops"
convtuner
.
namespace
=
"
spconv.
csrc.sparse.convops.convops"
convops
=
ConvGemmOps
(
gemmtuner
,
convtuner
)
convops
.
namespace
=
"csrc.sparse.convops.spops"
convops
.
namespace
=
"
spconv.
csrc.sparse.convops.spops"
cus
=
[
cu
,
...
...
@@ -51,6 +59,7 @@ def main(include: str,
ExternalAllocator
(),
ExternalSpconvMatmul
(),
SimpleExternalSpconvMatmul
(),
StaticAllocator
(),
]
gen_cmake
(
libname
,
cus
,
include
,
src
,
namespace_prefix
=
prefix
)
...
...
spconv/pytorch/__init__.py
View file @
21bb00ae
...
...
@@ -17,7 +17,9 @@ from spconv.pytorch.modules import (SparseModule, SparseSequential,
assign_name_for_sparse_modules
)
from
spconv.pytorch.ops
import
ConvAlgo
from
spconv.pytorch.pool
import
(
SparseMaxPool1d
,
SparseMaxPool2d
,
SparseMaxPool3d
,
SparseMaxPool4d
)
SparseMaxPool3d
,
SparseMaxPool4d
,
SparseAvgPool1d
,
SparseAvgPool2d
,
SparseAvgPool3d
)
from
spconv.pytorch.tables
import
AddTable
,
ConcatTable
,
JoinTable
...
...
spconv/pytorch/conv.py
View file @
21bb00ae
This diff is collapsed.
Click to expand it.
spconv/pytorch/core.py
View file @
21bb00ae
...
...
@@ -12,13 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from
typing
import
List
,
Optional
,
Tuple
,
Union
,
Dict
from
typing
import
Any
,
List
,
Optional
,
Tuple
,
Union
,
Dict
import
numpy
as
np
import
torch
from
spconv.core
import
ConvAlgo
from
spconv.pytorch.constants
import
PYTORCH_VERSION
from
spconv.tools
import
CUDAKernelTimer
from
spconv.constants
import
SPCONV_FX_TRACE_MODE
if
PYTORCH_VERSION
>=
[
1
,
8
,
0
]:
try
:
...
...
@@ -59,7 +60,8 @@ class ThrustSortAllocator:
class
IndiceData
(
object
):
def
__init__
(
self
,
out_indices
,
indices
,
indice_pairs
,
indice_pair_num
,
spatial_shape
,
out_spatial_shape
,
is_subm
:
bool
,
algo
:
ConvAlgo
,
ksize
:
List
[
int
],
stride
:
List
[
int
],
dilation
:
List
[
int
],
padding
:
List
[
int
]):
ksize
:
List
[
int
],
stride
:
List
[
int
],
dilation
:
List
[
int
],
padding
:
List
[
int
],
voxel_num
:
Optional
[
Any
]
=
None
):
self
.
out_indices
=
out_indices
self
.
indices
=
indices
self
.
indice_pairs
=
indice_pairs
...
...
@@ -72,6 +74,8 @@ class IndiceData(object):
self
.
stride
=
stride
self
.
dilation
=
dilation
self
.
padding
=
padding
# voxel_num is only used in tensorrt conversion.
self
.
voxel_num
=
voxel_num
class
ImplicitGemmIndiceData
(
object
):
...
...
@@ -83,7 +87,9 @@ class ImplicitGemmIndiceData(object):
mask_argsort_bwd_splits
:
List
[
torch
.
Tensor
],
masks
:
List
[
np
.
ndarray
],
spatial_shape
,
out_spatial_shape
,
is_subm
:
bool
,
algo
:
ConvAlgo
,
ksize
:
List
[
int
],
stride
:
List
[
int
],
dilation
:
List
[
int
],
padding
:
List
[
int
]):
ksize
:
List
[
int
],
stride
:
List
[
int
],
dilation
:
List
[
int
],
padding
:
List
[
int
],
in_voxel_num
:
Optional
[
Any
]
=
None
,
out_voxel_num
:
Optional
[
Any
]
=
None
):
self
.
out_indices
=
out_indices
self
.
indices
=
indices
self
.
pair_fwd
=
pair_fwd
...
...
@@ -101,6 +107,9 @@ class ImplicitGemmIndiceData(object):
self
.
stride
=
stride
self
.
dilation
=
dilation
self
.
padding
=
padding
# in/out voxel_num is only used in tensorrt conversion.
self
.
in_voxel_num
=
in_voxel_num
self
.
out_voxel_num
=
out_voxel_num
def
scatter_nd
(
indices
,
updates
,
shape
):
...
...
@@ -147,11 +156,12 @@ class SparseConvTensor(metaclass=SpConvTensorMeta):
force_algo: force conv/pool layers use this algo, should only used for debug.
"""
ndim
=
indices
.
shape
[
1
]
-
1
assert
features
.
ndim
==
2
assert
indices
.
ndim
==
2
assert
len
(
spatial_shape
)
==
ndim
,
"spatial shape must equal to ndim"
assert
indices
.
dtype
==
torch
.
int32
,
"only support int32"
assert
batch_size
>
0
if
not
SPCONV_FX_TRACE_MODE
:
assert
features
.
ndim
==
2
assert
indices
.
ndim
==
2
assert
len
(
spatial_shape
)
==
ndim
,
"spatial shape must equal to ndim"
assert
indices
.
dtype
==
torch
.
int32
,
"only support int32"
assert
batch_size
>
0
self
.
_features
=
features
self
.
indices
=
indices
self
.
spatial_shape
=
[
int
(
v
)
for
v
in
spatial_shape
]
...
...
spconv/pytorch/cppcore.py
View file @
21bb00ae
...
...
@@ -103,7 +103,7 @@ class TorchAllocator(ExternalAllocator):
self
.
allocated
:
Dict
[
Union
[
str
,
int
],
torch
.
Tensor
]
=
{}
def
zeros
(
self
,
name
:
str
,
shape
:
List
[
int
],
dtype
:
int
,
device
:
int
,
is_temp_memory
:
bool
=
False
,
stream
:
int
=
0
)
->
tv
.
Tensor
:
device
:
int
,
stream
:
int
=
0
,
is_temp_memory
:
bool
=
False
)
->
tv
.
Tensor
:
# TODO free memory by name if its already free by pointer.
# provide a name if you want to access it after c++ function exit.
torch_uint_workaround
=
dtype
in
_TORCH_UINT_WORKAROUNDS
...
...
@@ -126,7 +126,7 @@ class TorchAllocator(ExternalAllocator):
return
ten_tv
def
empty
(
self
,
name
:
str
,
shape
:
List
[
int
],
dtype
:
int
,
device
:
int
,
is_temp_memory
:
bool
=
False
,
stream
:
int
=
0
)
->
tv
.
Tensor
:
device
:
int
,
stream
:
int
=
0
,
is_temp_memory
:
bool
=
False
)
->
tv
.
Tensor
:
torch_uint_workaround
=
dtype
in
_TORCH_UINT_WORKAROUNDS
dtype_bkp
=
dtype
if
dtype
in
_TORCH_UINT_WORKAROUNDS
:
...
...
@@ -147,7 +147,7 @@ class TorchAllocator(ExternalAllocator):
return
ten_tv
def
full_int
(
self
,
name
:
str
,
shape
:
List
[
int
],
value
:
int
,
dtype
:
int
,
device
:
int
,
is_temp_memory
:
bool
=
False
,
stream
:
int
=
0
)
->
tv
.
Tensor
:
device
:
int
,
stream
:
int
=
0
,
is_temp_memory
:
bool
=
False
)
->
tv
.
Tensor
:
if
dtype
in
_TORCH_UINT_WORKAROUNDS
and
value
<
0
:
raise
NotImplementedError
(
"you can't use full for unsigned dtypes"
)
torch_uint_workaround
=
dtype
in
_TORCH_UINT_WORKAROUNDS
...
...
@@ -171,7 +171,7 @@ class TorchAllocator(ExternalAllocator):
return
ten_tv
def
full_float
(
self
,
name
:
str
,
shape
:
List
[
int
],
value
:
float
,
dtype
:
int
,
device
:
int
,
is_temp_memory
:
bool
=
False
,
stream
:
int
=
0
)
->
tv
.
Tensor
:
device
:
int
,
stream
:
int
=
0
,
is_temp_memory
:
bool
=
False
)
->
tv
.
Tensor
:
if
dtype
in
_TORCH_UINT_WORKAROUNDS
and
value
<
0
:
raise
NotImplementedError
(
"you can't use full for unsigned dtypes"
)
torch_uint_workaround
=
dtype
in
_TORCH_UINT_WORKAROUNDS
...
...
spconv/pytorch/functional.py
View file @
21bb00ae
...
...
@@ -361,6 +361,25 @@ class SparseMaxPoolImplicitGemmFunction(Function):
features
,
out
,
grad_output
,
indice_pairs_bwd
)
return
input_bp
,
None
,
None
,
None
class
SparseAvgPoolImplicitGemmFunction
(
Function
):
@
staticmethod
@
_TORCH_CUSTOM_FWD
def
forward
(
ctx
,
features
:
torch
.
Tensor
,
indice_pairs_fwd
:
torch
.
Tensor
,
indice_pairs_bwd
:
torch
.
Tensor
,
num_activate_out
:
int
,
calc_count
):
out
,
count
=
ops
.
indice_avgpool_implicit_gemm
(
features
,
indice_pairs_fwd
,
num_activate_out
,
calc_count
)
ctx
.
save_for_backward
(
indice_pairs_bwd
,
features
,
out
,
count
)
return
out
@
staticmethod
@
once_differentiable
@
_TORCH_CUSTOM_BWD
def
backward
(
ctx
,
grad_output
):
indice_pairs_bwd
,
features
,
out
,
count
=
ctx
.
saved_tensors
input_bp
=
ops
.
indice_avgpool_implicit_gemm_backward
(
grad_output
,
indice_pairs_bwd
,
count
)
return
input_bp
,
None
,
None
,
None
,
None
indice_conv
=
SparseConvFunction
.
apply
implicit_gemm
=
SparseImplicitGemmFunction
.
apply
...
...
@@ -368,6 +387,7 @@ indice_inverse_conv = SparseInverseConvFunction.apply
indice_subm_conv
=
SubMConvFunction
.
apply
indice_maxpool
=
SparseMaxPoolFunction
.
apply
indice_maxpool_implicit_gemm
=
SparseMaxPoolImplicitGemmFunction
.
apply
indice_avgpool_implicit_gemm
=
SparseAvgPoolImplicitGemmFunction
.
apply
def
_indice_to_scalar
(
indices
:
torch
.
Tensor
,
shape
:
List
[
int
]):
...
...
spconv/pytorch/modules.py
View file @
21bb00ae
...
...
@@ -132,12 +132,11 @@ class SparseSequential(SparseModule):
if
isinstance
(
input
,
list
):
input
=
module
(
input
)
else
:
assert
isinstance
(
input
,
spconv
.
SparseConvTensor
)
#
assert isinstance(input, spconv.SparseConvTensor)
# self._sparity_dict[k] = input.sparity
input
=
module
(
input
)
else
:
if
isinstance
(
input
,
spconv
.
SparseConvTensor
):
print
(
input
.
features
.
shape
)
if
input
.
indices
.
shape
[
0
]
!=
0
:
input
=
input
.
replace_feature
(
module
(
input
.
features
))
else
:
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment