Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
one
spconv
Commits
21bb00ae
Commit
21bb00ae
authored
Jul 27, 2022
by
Yan Yan
Browse files
still working on c++ only
parent
899008fa
Changes
25
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
994 additions
and
313 deletions
+994
-313
example/tensorrt/README.md
example/tensorrt/README.md
+17
-0
setup.py
setup.py
+1
-1
spconv/build.py
spconv/build.py
+1
-2
spconv/constants.py
spconv/constants.py
+4
-2
spconv/core_cc/csrc/sparse/all/__init__.pyi
spconv/core_cc/csrc/sparse/all/__init__.pyi
+54
-10
spconv/core_cc/csrc/sparse/alloc.pyi
spconv/core_cc/csrc/sparse/alloc.pyi
+8
-8
spconv/csrc/sparse/all.py
spconv/csrc/sparse/all.py
+258
-36
spconv/csrc/sparse/alloc.py
spconv/csrc/sparse/alloc.py
+107
-29
spconv/csrc/sparse/convops.py
spconv/csrc/sparse/convops.py
+13
-14
spconv/csrc/sparse/indices.py
spconv/csrc/sparse/indices.py
+115
-67
spconv/csrc/sparse/maxpool.py
spconv/csrc/sparse/maxpool.py
+165
-0
spconv/csrc/sparse/pointops.py
spconv/csrc/sparse/pointops.py
+2
-2
spconv/gencode/__init__.py
spconv/gencode/__init__.py
+14
-0
spconv/gencode/__main__.py
spconv/gencode/__main__.py
+14
-5
spconv/pytorch/__init__.py
spconv/pytorch/__init__.py
+3
-1
spconv/pytorch/conv.py
spconv/pytorch/conv.py
+175
-122
spconv/pytorch/core.py
spconv/pytorch/core.py
+18
-8
spconv/pytorch/cppcore.py
spconv/pytorch/cppcore.py
+4
-4
spconv/pytorch/functional.py
spconv/pytorch/functional.py
+20
-0
spconv/pytorch/modules.py
spconv/pytorch/modules.py
+1
-2
No files found.
example/tensorrt/README.md
0 → 100644
View file @
21bb00ae
<!--
Copyright 2022 Yan Yan
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
TODO
\ No newline at end of file
setup.py
View file @
21bb00ae
...
@@ -175,7 +175,7 @@ if disable_jit is not None and disable_jit == "1":
...
@@ -175,7 +175,7 @@ if disable_jit is not None and disable_jit == "1":
std
=
"c++14"
std
=
"c++14"
else
:
else
:
std
=
"c++17"
std
=
"c++17"
if
CUMM_CPU_ONLY_BUILD
:
if
not
CUMM_CPU_ONLY_BUILD
:
gemmtuner
=
GemmTunerSimple
(
cu
)
gemmtuner
=
GemmTunerSimple
(
cu
)
gemmtuner
.
namespace
=
"csrc.sparse.convops.gemmops"
gemmtuner
.
namespace
=
"csrc.sparse.convops.gemmops"
convtuner
=
ConvTunerSimple
(
convcu
)
convtuner
=
ConvTunerSimple
(
convcu
)
...
...
spconv/build.py
View file @
21bb00ae
...
@@ -62,8 +62,7 @@ if project_is_installed(PACKAGE_NAME) and project_is_editable(
...
@@ -62,8 +62,7 @@ if project_is_installed(PACKAGE_NAME) and project_is_editable(
CompileInfo
(),
CompileInfo
(),
ExternalAllocator
(),
ExternalAllocator
(),
ExternalSpconvMatmul
(),
ExternalSpconvMatmul
(),
SimpleExternalSpconvMatmul
(),
SimpleExternalSpconvMatmul
(),
# for debug, won't be included in release
]
]
pccm
.
builder
.
build_pybind
(
cus
,
pccm
.
builder
.
build_pybind
(
cus
,
PACKAGE_ROOT
/
"core_cc"
,
PACKAGE_ROOT
/
"core_cc"
,
...
...
spconv/constants.py
View file @
21bb00ae
...
@@ -64,7 +64,7 @@ SPCONV_DEBUG_CPP_ONLY = project_is_editable(PACKAGE_NAME)
...
@@ -64,7 +64,7 @@ SPCONV_DEBUG_CPP_ONLY = project_is_editable(PACKAGE_NAME)
class
AllocKeys
:
class
AllocKeys
:
Pair
=
"Pair"
Pair
Bwd
=
"Pair
Bwd
"
IndiceNumPerLoc
=
"IndiceNumPerLoc"
IndiceNumPerLoc
=
"IndiceNumPerLoc"
PairMask
=
"PairMask"
PairMask
=
"PairMask"
MaskArgSort
=
"MaskArgSort"
MaskArgSort
=
"MaskArgSort"
...
@@ -102,4 +102,6 @@ SPCONV_DEBUG_WEIGHT = False
...
@@ -102,4 +102,6 @@ SPCONV_DEBUG_WEIGHT = False
SPCONV_CPP_INDICE_PAIRS
=
True
SPCONV_CPP_INDICE_PAIRS
=
True
SPCONV_CPP_INDICE_PAIRS_IGEMM
=
True
SPCONV_CPP_INDICE_PAIRS_IGEMM
=
True
SPCONV_CPP_GEMM
=
True
SPCONV_CPP_GEMM
=
True
\ No newline at end of file
SPCONV_FX_TRACE_MODE
=
os
.
getenv
(
"SPCONV_FX_TRACE_MODE"
,
"0"
)
==
"1"
\ No newline at end of file
spconv/core_cc/csrc/sparse/all/__init__.pyi
View file @
21bb00ae
...
@@ -240,6 +240,28 @@ class SpconvOps:
...
@@ -240,6 +240,28 @@ class SpconvOps:
"""
"""
...
...
@staticmethod
@staticmethod
def avgpool_implicit_gemm_forward(out: Tensor, inp: Tensor, inds: Tensor, count_out: Tensor, stream: int = 0) -> None:
"""
Args:
out:
inp:
inds:
count_out:
stream:
"""
...
@staticmethod
def avgpool_implicit_gemm_backward(dout: Tensor, dinp: Tensor, inds: Tensor, count_out: Tensor, stream: int = 0) -> None:
"""
Args:
dout:
dinp:
inds:
count_out:
stream:
"""
...
@staticmethod
def maxpool_forward_cpu(out: Tensor, inp: Tensor, out_inds: Tensor, in_inds: Tensor) -> None:
def maxpool_forward_cpu(out: Tensor, inp: Tensor, out_inds: Tensor, in_inds: Tensor) -> None:
"""
"""
Args:
Args:
...
@@ -280,15 +302,6 @@ class SpconvOps:
...
@@ -280,15 +302,6 @@ class SpconvOps:
"""
"""
...
...
@staticmethod
@staticmethod
def sort_1d_by_key(data: Tensor, indices: Tensor = Tensor(), stream: int = 0) -> Tensor:
"""
Args:
data:
indices:
stream:
"""
...
@staticmethod
def sort_1d_by_key_allocator(data: Tensor, alloc_func, indices: Tensor = Tensor(), stream: int = 0) -> Tensor:
def sort_1d_by_key_allocator(data: Tensor, alloc_func, indices: Tensor = Tensor(), stream: int = 0) -> Tensor:
"""
"""
Args:
Args:
...
@@ -348,6 +361,24 @@ class SpconvOps:
...
@@ -348,6 +361,24 @@ class SpconvOps:
"""
"""
...
...
@staticmethod
@staticmethod
def maximum_value_int(data: Tensor, value: int, stream_int: int) -> None:
"""
Args:
data:
value:
stream_int:
"""
...
@staticmethod
def sort_1d_by_key(data: Tensor, indices: Tensor = Tensor(), stream: int = 0) -> Tensor:
"""
Args:
data:
indices:
stream:
"""
...
@staticmethod
def calc_point2voxel_meta_data(vsize_xyz: List[float], coors_range_xyz: List[float]) -> Tuple[List[float], List[int], List[int], List[float]]:
def calc_point2voxel_meta_data(vsize_xyz: List[float], coors_range_xyz: List[float]) -> Tuple[List[float], List[int], List[int], List[float]]:
"""
"""
Args:
Args:
...
@@ -407,6 +438,18 @@ class SpconvOps:
...
@@ -407,6 +438,18 @@ class SpconvOps:
"""
"""
...
...
@staticmethod
@staticmethod
def get_indice_gen_tensors_from_workspace(workspace, kv: int, num_act_in: int, num_act_out_bound: int, subm: bool, use_int64_hash_k: bool) -> Dict[str, Tensor]:
"""
Args:
workspace:
kv:
num_act_in:
num_act_out_bound:
subm:
use_int64_hash_k:
"""
...
@staticmethod
def get_indice_pairs_implicit_gemm(allocator, indices: Tensor, batch_size: int, input_dims: List[int], algo: int, ksize: List[int], stride: List[int], padding: List[int], dilation: List[int], out_padding: List[int], subm: bool, transposed: bool, is_train: bool, stream_int: int = 0, num_out_act_bound: int = -1) -> Tuple[Tensor, int]:
def get_indice_pairs_implicit_gemm(allocator, indices: Tensor, batch_size: int, input_dims: List[int], algo: int, ksize: List[int], stride: List[int], padding: List[int], dilation: List[int], out_padding: List[int], subm: bool, transposed: bool, is_train: bool, stream_int: int = 0, num_out_act_bound: int = -1) -> Tuple[Tensor, int]:
"""
"""
Args:
Args:
...
@@ -428,7 +471,7 @@ class SpconvOps:
...
@@ -428,7 +471,7 @@ class SpconvOps:
"""
"""
...
...
@staticmethod
@staticmethod
def get_indice_pairs(allocator, indices: Tensor, batch_size: int, input_dims: List[int], algo: int, ksize: List[int], stride: List[int], padding: List[int], dilation: List[int], out_padding: List[int], subm: bool, transposed: bool, stream_int: int = 0, num_out_act_bound: int = -1) -> int:
def get_indice_pairs(allocator, indices: Tensor, batch_size: int, input_dims: List[int], algo: int, ksize: List[int], stride: List[int], padding: List[int], dilation: List[int], out_padding: List[int], subm: bool, transposed: bool, stream_int: int = 0, num_out_act_bound: int =
-1, num_input_act_bound: int =
-1) -> int:
"""
"""
Args:
Args:
allocator:
allocator:
...
@@ -445,5 +488,6 @@ class SpconvOps:
...
@@ -445,5 +488,6 @@ class SpconvOps:
transposed:
transposed:
stream_int:
stream_int:
num_out_act_bound:
num_out_act_bound:
num_input_act_bound:
"""
"""
...
...
spconv/core_cc/csrc/sparse/alloc.pyi
View file @
21bb00ae
...
@@ -2,29 +2,29 @@ from typing import overload, Any, Callable, Dict, List, Optional, Set, Tuple, Ty
...
@@ -2,29 +2,29 @@ from typing import overload, Any, Callable, Dict, List, Optional, Set, Tuple, Ty
from pccm.stubs import EnumValue, EnumClassValue
from pccm.stubs import EnumValue, EnumClassValue
from cumm.tensorview import Tensor
from cumm.tensorview import Tensor
class ExternalAllocator:
class ExternalAllocator:
def zeros(self, name: str, shape: List[int], dtype: int, device: int, is_temp_memory: bool = False
, stream: int = 0
) -> Tensor:
def zeros(self, name: str, shape: List[int], dtype: int, device: int,
stream: int = 0,
is_temp_memory: bool = False) -> Tensor:
"""
"""
Args:
Args:
name:
name:
shape:
shape:
dtype:
dtype:
device:
device:
is_temp_memory:
stream:
stream:
is_temp_memory:
"""
"""
...
...
def empty(self, name: str, shape: List[int], dtype: int, device: int, is_temp_memory: bool = False
, stream: int = 0
) -> Tensor:
def empty(self, name: str, shape: List[int], dtype: int, device: int,
stream: int = 0,
is_temp_memory: bool = False) -> Tensor:
"""
"""
Args:
Args:
name:
name:
shape:
shape:
dtype:
dtype:
device:
device:
is_temp_memory:
stream:
stream:
is_temp_memory:
"""
"""
...
...
def full_int(self, name: str, shape: List[int], value: int, dtype: int, device: int, is_temp_memory: bool = False
, stream: int = 0
) -> Tensor:
def full_int(self, name: str, shape: List[int], value: int, dtype: int, device: int,
stream: int = 0,
is_temp_memory: bool = False) -> Tensor:
"""
"""
Args:
Args:
name:
name:
...
@@ -32,11 +32,11 @@ class ExternalAllocator:
...
@@ -32,11 +32,11 @@ class ExternalAllocator:
value:
value:
dtype:
dtype:
device:
device:
is_temp_memory:
stream:
stream:
is_temp_memory:
"""
"""
...
...
def full_float(self, name: str, shape: List[int], value: float, dtype: int, device: int, is_temp_memory: bool = False
, stream: int = 0
) -> Tensor:
def full_float(self, name: str, shape: List[int], value: float, dtype: int, device: int,
stream: int = 0,
is_temp_memory: bool = False) -> Tensor:
"""
"""
Args:
Args:
name:
name:
...
@@ -44,8 +44,8 @@ class ExternalAllocator:
...
@@ -44,8 +44,8 @@ class ExternalAllocator:
value:
value:
dtype:
dtype:
device:
device:
is_temp_memory:
stream:
stream:
is_temp_memory:
"""
"""
...
...
def get_tensor_by_name(self, name: str) -> Tensor:
def get_tensor_by_name(self, name: str) -> Tensor:
...
...
spconv/csrc/sparse/all.py
View file @
21bb00ae
This diff is collapsed.
Click to expand it.
spconv/csrc/sparse/alloc.py
View file @
21bb00ae
...
@@ -2,7 +2,8 @@ import pccm
...
@@ -2,7 +2,8 @@ import pccm
from
cumm.common
import
TensorView
,
TensorViewCPU
,
TensorViewKernel
,
ThrustLib
from
cumm.common
import
TensorView
,
TensorViewCPU
,
TensorViewKernel
,
ThrustLib
from
spconv.constants
import
AllocKeys
from
spconv.constants
import
AllocKeys
from
cumm.constants
import
CUMM_CPU_ONLY_BUILD
from
.indices
import
CudaCommonKernel
class
ExternalAllocatorGuard
(
pccm
.
Class
):
class
ExternalAllocatorGuard
(
pccm
.
Class
):
def
__init__
(
self
):
def
__init__
(
self
):
super
().
__init__
()
super
().
__init__
()
...
@@ -53,8 +54,8 @@ class ExternalAllocator(pccm.Class):
...
@@ -53,8 +54,8 @@ class ExternalAllocator(pccm.Class):
code
.
arg
(
"shape"
,
"std::vector<int64_t>"
)
code
.
arg
(
"shape"
,
"std::vector<int64_t>"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
return
code
.
ret
(
"tv::Tensor"
)
return
code
.
ret
(
"tv::Tensor"
)
...
@@ -66,8 +67,8 @@ class ExternalAllocator(pccm.Class):
...
@@ -66,8 +67,8 @@ class ExternalAllocator(pccm.Class):
code
.
arg
(
"shape"
,
"std::vector<int64_t>"
)
code
.
arg
(
"shape"
,
"std::vector<int64_t>"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
return
code
.
ret
(
"tv::Tensor"
)
return
code
.
ret
(
"tv::Tensor"
)
...
@@ -80,8 +81,8 @@ class ExternalAllocator(pccm.Class):
...
@@ -80,8 +81,8 @@ class ExternalAllocator(pccm.Class):
code
.
arg
(
"value"
,
"int"
)
code
.
arg
(
"value"
,
"int"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
return
code
.
ret
(
"tv::Tensor"
)
return
code
.
ret
(
"tv::Tensor"
)
...
@@ -94,8 +95,9 @@ class ExternalAllocator(pccm.Class):
...
@@ -94,8 +95,9 @@ class ExternalAllocator(pccm.Class):
code
.
arg
(
"value"
,
"float"
)
code
.
arg
(
"value"
,
"float"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
return
code
.
ret
(
"tv::Tensor"
)
return
code
.
ret
(
"tv::Tensor"
)
@
pccm
.
pybind
.
mark
(
virtual
=
True
)
@
pccm
.
pybind
.
mark
(
virtual
=
True
)
...
@@ -129,7 +131,7 @@ class ExternalAllocator(pccm.Class):
...
@@ -129,7 +131,7 @@ class ExternalAllocator(pccm.Class):
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
raw
(
f
"""
code
.
raw
(
f
"""
// "" means temp memory
// "" means temp memory
auto ten = zeros(name, shape, dtype, device,
true,
stream);
auto ten = zeros(name, shape, dtype, device, stream
, true
);
return std::make_
{
self
.
ptr_type
}
<ExternalAllocatorGuard>(ten, [this](tv::Tensor ten){{
return std::make_
{
self
.
ptr_type
}
<ExternalAllocatorGuard>(ten, [this](tv::Tensor ten){{
this->free(ten);
this->free(ten);
}});
}});
...
@@ -145,7 +147,7 @@ class ExternalAllocator(pccm.Class):
...
@@ -145,7 +147,7 @@ class ExternalAllocator(pccm.Class):
code
.
arg
(
"name"
,
"std::string"
,
"
\"\"
"
)
code
.
arg
(
"name"
,
"std::string"
,
"
\"\"
"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
raw
(
f
"""
code
.
raw
(
f
"""
auto ten = empty(name, shape, dtype, device,
true,
stream);
auto ten = empty(name, shape, dtype, device, stream
, true
);
return std::make_
{
self
.
ptr_type
}
<ExternalAllocatorGuard>(ten, [this](tv::Tensor ten){{
return std::make_
{
self
.
ptr_type
}
<ExternalAllocatorGuard>(ten, [this](tv::Tensor ten){{
this->free(ten);
this->free(ten);
}});
}});
...
@@ -162,7 +164,7 @@ class ExternalAllocator(pccm.Class):
...
@@ -162,7 +164,7 @@ class ExternalAllocator(pccm.Class):
code
.
arg
(
"name"
,
"std::string"
,
"
\"\"
"
)
code
.
arg
(
"name"
,
"std::string"
,
"
\"\"
"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
raw
(
f
"""
code
.
raw
(
f
"""
auto ten = full_int(name, shape, value, dtype, device,
true,
stream);
auto ten = full_int(name, shape, value, dtype, device, stream
, true
);
return std::make_
{
self
.
ptr_type
}
<ExternalAllocatorGuard>(ten, [this](tv::Tensor ten){{
return std::make_
{
self
.
ptr_type
}
<ExternalAllocatorGuard>(ten, [this](tv::Tensor ten){{
this->free(ten);
this->free(ten);
}});
}});
...
@@ -179,7 +181,7 @@ class ExternalAllocator(pccm.Class):
...
@@ -179,7 +181,7 @@ class ExternalAllocator(pccm.Class):
code
.
arg
(
"name"
,
"std::string"
,
"
\"\"
"
)
code
.
arg
(
"name"
,
"std::string"
,
"
\"\"
"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
raw
(
f
"""
code
.
raw
(
f
"""
auto ten = full_float(name, shape, value, dtype, device,
true,
stream);
auto ten = full_float(name, shape, value, dtype, device, stream
, true
);
return std::make_
{
self
.
ptr_type
}
<ExternalAllocatorGuard>(ten, [this](tv::Tensor t){{
return std::make_
{
self
.
ptr_type
}
<ExternalAllocatorGuard>(ten, [this](tv::Tensor t){{
this->free(t);
this->free(t);
}});
}});
...
@@ -222,8 +224,10 @@ class ThrustAllocator(pccm.Class):
...
@@ -222,8 +224,10 @@ class ThrustAllocator(pccm.Class):
"""
)
"""
)
return
code
return
code
class
StaticAllocator
(
ExternalAllocator
):
class
StaticAllocator
(
ExternalAllocator
):
"""a s
imple
allocator for tensorrt plugin.
"""a s
tatic
allocator for tensorrt plugin.
"""
"""
def
__init__
(
self
):
def
__init__
(
self
):
super
().
__init__
()
super
().
__init__
()
...
@@ -232,6 +236,7 @@ class StaticAllocator(ExternalAllocator):
...
@@ -232,6 +236,7 @@ class StaticAllocator(ExternalAllocator):
self
.
add_member
(
"repr_"
,
"std::string"
)
self
.
add_member
(
"repr_"
,
"std::string"
)
self
.
add_member
(
"thrust_tmp_tensor_"
,
"tv::Tensor"
)
self
.
add_member
(
"thrust_tmp_tensor_"
,
"tv::Tensor"
)
self
.
grow
=
1.5
self
.
grow
=
1.5
self
.
cuda_common_kernel
=
CudaCommonKernel
()
@
pccm
.
pybind
.
mark
@
pccm
.
pybind
.
mark
@
pccm
.
constructor
@
pccm
.
constructor
...
@@ -242,7 +247,22 @@ class StaticAllocator(ExternalAllocator):
...
@@ -242,7 +247,22 @@ class StaticAllocator(ExternalAllocator):
code
.
raw
(
f
"""
code
.
raw
(
f
"""
std::stringstream ss;
std::stringstream ss;
for (auto& p : tensor_dict){{
for (auto& p : tensor_dict){{
tv::ssprint(ss, p.first, p.second.shape(), tv::dtype_str(p.second.dtype()), "
\\
n");
tv::sstream_print(ss, p.first, p.second.shape(), tv::dtype_str(p.second.dtype()), "
\\
n");
}}
repr_ = ss.str();
"""
)
return
code
@
pccm
.
pybind
.
mark
@
pccm
.
member_function
def
set_new_tensor_dict
(
self
):
code
=
pccm
.
code
()
code
.
arg
(
"tensor_dict"
,
"std::unordered_map<std::string, tv::Tensor>"
)
code
.
raw
(
f
"""
tensor_dict_ = tensor_dict;
std::stringstream ss;
for (auto& p : tensor_dict){{
tv::sstream_print(ss, p.first, p.second.shape(), tv::dtype_str(p.second.dtype()), "
\\
n");
}}
}}
repr_ = ss.str();
repr_ = ss.str();
"""
)
"""
)
...
@@ -255,12 +275,21 @@ class StaticAllocator(ExternalAllocator):
...
@@ -255,12 +275,21 @@ class StaticAllocator(ExternalAllocator):
code
.
arg
(
"shape"
,
"std::vector<int64_t>"
)
code
.
arg
(
"shape"
,
"std::vector<int64_t>"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
raw
(
f
"""
code
.
raw
(
f
"""
auto res = get_tensor_by_name(name);
auto res = get_tensor_by_name(name);
size_t total = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int64_t>());
size_t total = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int64_t>());
TV_ASSERT_RT_ERR(res.nbytes() >= total * tv::bit_size(tv::DType(dtype))
TV_ASSERT_RT_ERR(res.nbytes() >= total * tv::bit_size(tv::DType(dtype)) / 8
&& res.device() == device, "alloc failed", shape, res.shape());
&& res.device() == device, "alloc failed, tensor size too small", shape, res.shape());
return tv::from_blob(res.raw_data(), shape, dtype, device);
// if (is_temp_memory){{
// }}else{{
// // size must exactly match
// TV_ASSERT_RT_ERR(res.nbytes() == total * tv::bit_size(tv::DType(dtype)) / 8
// && res.device() == device, "alloc failed, named memory size must match", shape, res.shape());
// }}
return tv::from_blob(res.raw_data(), shape, tv::DType(dtype), device);
"""
)
"""
)
return
code
.
ret
(
"tv::Tensor"
)
return
code
.
ret
(
"tv::Tensor"
)
...
@@ -273,16 +302,22 @@ class StaticAllocator(ExternalAllocator):
...
@@ -273,16 +302,22 @@ class StaticAllocator(ExternalAllocator):
code
.
arg
(
"shape"
,
"std::vector<int64_t>"
)
code
.
arg
(
"shape"
,
"std::vector<int64_t>"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
raw
(
f
"""
code
.
raw
(
f
"""
auto tvctx = tv::Context();
auto tvctx = tv::Context();
tvctx.set_cuda_stream(reinterpret_cast<cudaStream_t>(stream));
"""
)
auto blob = _get_raw_and_check(name, shape, dtype, device);
if
not
CUMM_CPU_ONLY_BUILD
:
code
.
raw
(
f
"""
tvctx.set_cuda_stream(reinterpret_cast<cudaStream_t>(stream));
"""
)
code
.
raw
(
f
"""
auto blob = _get_raw_and_check(name, shape, dtype, device, is_temp_memory);
return blob.zero_(tvctx);
return blob.zero_(tvctx);
"""
)
"""
)
return
code
.
ret
(
"tv::Tensor"
)
return
code
.
ret
(
"tv::Tensor"
)
@
pccm
.
pybind
.
mark
@
pccm
.
pybind
.
mark
@
pccm
.
member_function
(
virtual
=
True
)
@
pccm
.
member_function
(
virtual
=
True
)
def
empty
(
self
):
def
empty
(
self
):
...
@@ -291,8 +326,8 @@ class StaticAllocator(ExternalAllocator):
...
@@ -291,8 +326,8 @@ class StaticAllocator(ExternalAllocator):
code
.
arg
(
"shape"
,
"std::vector<int64_t>"
)
code
.
arg
(
"shape"
,
"std::vector<int64_t>"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
raw
(
f
"""
code
.
raw
(
f
"""
if (name ==
{
pccm
.
literal
(
AllocKeys
.
ThrustTemp
)
}
){{
if (name ==
{
pccm
.
literal
(
AllocKeys
.
ThrustTemp
)
}
){{
// thrust tmp shouldn't inside tensor_dict. use a simple method to allocate
// thrust tmp shouldn't inside tensor_dict. use a simple method to allocate
...
@@ -300,23 +335,28 @@ class StaticAllocator(ExternalAllocator):
...
@@ -300,23 +335,28 @@ class StaticAllocator(ExternalAllocator):
// so we can just use one tensor
// so we can just use one tensor
tv::Tensor res = thrust_tmp_tensor_;
tv::Tensor res = thrust_tmp_tensor_;
if (res.empty()){{
if (res.empty()){{
res = tv::empty(shape, dtype, device);
res = tv::empty(shape,
tv::DType(
dtype
)
, device);
thrust_tmp_tensor_ = res;
thrust_tmp_tensor_ = res;
}}
}}
if (shape[0] > thrust_tmp_tensor_.dim(0)){{
if (shape[0] > thrust_tmp_tensor_.dim(0)){{
res = tv::empty({{int64_t(shape[0] *
{
self
.
grow
}
)}}, dtype, device);
res = tv::empty({{int64_t(shape[0] *
{
self
.
grow
}
)}},
tv::DType(
dtype
)
, device);
thrust_tmp_tensor_ = res;
thrust_tmp_tensor_ = res;
}}
}}
return res;
return res;
}}else{{
}}else{{
auto blob = _get_raw_and_check(name, shape, dtype, device);
auto blob = _get_raw_and_check(name, shape, dtype, device
, is_temp_memory
);
return blob;
return blob;
}}
}}
"""
)
"""
)
return
code
.
ret
(
"tv::Tensor"
)
return
code
.
ret
(
"tv::Tensor"
)
# cpu only build can't use pccm.cuda
__CUDA_DECORATOR
=
pccm
.
member_function
if
not
CUMM_CPU_ONLY_BUILD
:
__CUDA_DECORATOR
=
pccm
.
cuda
.
member_function
@
pccm
.
pybind
.
mark
@
pccm
.
pybind
.
mark
@
pccm
.
member_function
(
virtual
=
True
)
@
__CUDA_DECORATOR
def
full_int
(
self
):
def
full_int
(
self
):
code
=
pccm
.
code
()
code
=
pccm
.
code
()
code
.
arg
(
"name"
,
"std::string"
)
code
.
arg
(
"name"
,
"std::string"
)
...
@@ -324,17 +364,36 @@ class StaticAllocator(ExternalAllocator):
...
@@ -324,17 +364,36 @@ class StaticAllocator(ExternalAllocator):
code
.
arg
(
"value"
,
"int"
)
code
.
arg
(
"value"
,
"int"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
raw
(
f
"""
code
.
raw
(
f
"""
auto tvctx = tv::Context();
auto tvctx = tv::Context();
auto blob = _get_raw_and_check(name, shape, dtype, device);
auto blob = _get_raw_and_check(name, shape, dtype, device, is_temp_memory);
return blob.fill_(tvctx, value);
"""
)
if
not
CUMM_CPU_ONLY_BUILD
:
code
.
add_param_class
(
"cudakers"
,
self
.
cuda_common_kernel
)
code
.
raw
(
f
"""
tvctx.set_cuda_stream(reinterpret_cast<cudaStream_t>(stream));
using ints_t = std::tuple<int32_t, int16_t, int8_t, int64_t, uint32_t, uint64_t, uint16_t, uint8_t>;
tv::Dispatch<ints_t>()(blob.dtype(), [&](auto I){{
using T = TV_DECLTYPE(I);
tv::cuda::Launch lanucher_fill(blob.size(), reinterpret_cast<cudaStream_t>(stream));
lanucher_fill(cudakers::fill_kernel<T>, blob.data_ptr<T>(), value, blob.size());
}});
"""
)
else
:
code
.
raw
(
f
"""
blob.fill_(value);
"""
)
code
.
raw
(
f
"""
return blob;
"""
)
"""
)
return
code
.
ret
(
"tv::Tensor"
)
return
code
.
ret
(
"tv::Tensor"
)
@
pccm
.
pybind
.
mark
@
pccm
.
pybind
.
mark
@
pccm
.
member_function
(
virtual
=
True
)
@
__CUDA_DECORATOR
def
full_float
(
self
):
def
full_float
(
self
):
code
=
pccm
.
code
()
code
=
pccm
.
code
()
code
.
arg
(
"name"
,
"std::string"
)
code
.
arg
(
"name"
,
"std::string"
)
...
@@ -342,11 +401,29 @@ class StaticAllocator(ExternalAllocator):
...
@@ -342,11 +401,29 @@ class StaticAllocator(ExternalAllocator):
code
.
arg
(
"value"
,
"float"
)
code
.
arg
(
"value"
,
"float"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"dtype"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"device"
,
"int"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"is_temp_memory"
,
"bool"
,
"false"
)
code
.
raw
(
f
"""
auto tvctx = tv::Context();
auto blob = _get_raw_and_check(name, shape, dtype, device, is_temp_memory);
"""
)
if
not
CUMM_CPU_ONLY_BUILD
:
code
.
add_param_class
(
"cudakers"
,
self
.
cuda_common_kernel
)
code
.
raw
(
f
"""
tvctx.set_cuda_stream(reinterpret_cast<cudaStream_t>(stream));
using dtypes_t = std::tuple<float, double>;
tv::Dispatch<dtypes_t>()(blob.dtype(), [&](auto I){{
using T = TV_DECLTYPE(I);
tv::cuda::Launch lanucher_fill(blob.size(), reinterpret_cast<cudaStream_t>(stream));
lanucher_fill(cudakers::fill_kernel<T>, blob.data_ptr<T>(), value, blob.size());
}});
"""
)
else
:
code
.
raw
(
f
"""
blob.fill_(value);
"""
)
code
.
raw
(
f
"""
code
.
raw
(
f
"""
auto blob = _get_raw_and_check(name, shape, dtype, device);
return blob;
return blob.fill_(tvctx, value);
"""
)
"""
)
return
code
.
ret
(
"tv::Tensor"
)
return
code
.
ret
(
"tv::Tensor"
)
...
@@ -364,6 +441,7 @@ class StaticAllocator(ExternalAllocator):
...
@@ -364,6 +441,7 @@ class StaticAllocator(ExternalAllocator):
@
pccm
.
pybind
.
mark
@
pccm
.
pybind
.
mark
@
pccm
.
member_function
(
virtual
=
True
)
@
pccm
.
member_function
(
virtual
=
True
)
def
free
(
self
):
def
free
(
self
):
# nothing here because this is a static allocator
code
=
pccm
.
code
()
code
=
pccm
.
code
()
code
.
arg
(
"ten"
,
"tv::Tensor"
)
code
.
arg
(
"ten"
,
"tv::Tensor"
)
return
code
return
code
...
...
spconv/csrc/sparse/convops.py
View file @
21bb00ae
...
@@ -78,11 +78,9 @@ class ExternalSpconvMatmul(pccm.Class):
...
@@ -78,11 +78,9 @@ class ExternalSpconvMatmul(pccm.Class):
return
code
return
code
class
SimpleExternalSpconvMatmul
(
ExternalSpconvMatmul
):
class
SimpleExternalSpconvMatmul
(
ExternalSpconvMatmul
):
"""a helper class to warp matmul operations
"""implement gemm in cuda via cublasLt. (only support forward)
because we don't want to implement matmul
should be used with tensorrt plugin.
(link to cublas/mkl/pytorch) in python package.
"""
"""
def
__init__
(
self
):
def
__init__
(
self
):
super
().
__init__
()
super
().
__init__
()
self
.
add_dependency
(
TensorView
,
ExternalAllocator
)
self
.
add_dependency
(
TensorView
,
ExternalAllocator
)
...
@@ -311,7 +309,7 @@ class SimpleExternalSpconvMatmul(ExternalSpconvMatmul):
...
@@ -311,7 +309,7 @@ class SimpleExternalSpconvMatmul(ExternalSpconvMatmul):
TV_THROW_RT_ERR("unsupported");
TV_THROW_RT_ERR("unsupported");
}}
}}
check_cublas_status(cublasLtMatmul(
check_cublas_status(cublasLtMatmul(
handle, operationDesc, alpha_storage, a.raw_data(), Adesc, b.raw_data(),
handle, operationDesc, alpha_storage, a.
const_
raw_data(), Adesc, b.
const_
raw_data(),
Bdesc, beta_storage, c.raw_data(), Cdesc, c.raw_data(), Cdesc,
Bdesc, beta_storage, c.raw_data(), Cdesc, c.raw_data(), Cdesc,
&heuristicResult.algo, nullptr, 0, stream));
&heuristicResult.algo, nullptr, 0, stream));
if (preference)
if (preference)
...
@@ -1417,11 +1415,12 @@ class ConvGemmOps(pccm.ParameterizedClass):
...
@@ -1417,11 +1415,12 @@ class ConvGemmOps(pccm.ParameterizedClass):
is_KC_not_CK, kv_center, out_channel);
is_KC_not_CK, kv_center, out_channel);
}}else{{
}}else{{
out_features = allocator.zeros(
{
pccm
.
literal
(
AllocKeys
.
OutFeatures
)
}
,
out_features = allocator.zeros(
{
pccm
.
literal
(
AllocKeys
.
OutFeatures
)
}
,
{{num_activate_out, out_channel}}, features.dtype(), features.device());
{{num_activate_out, out_channel}}, features.dtype(), features.device()
, stream_int
);
}}
}}
if (kv == 1 && subm){{
if (kv == 1 && subm){{
return;
return;
}}
}}
auto indice_pair_num_cpu = indice_pair_num.cpu();
auto indice_pair_num_cpu = indice_pair_num.cpu();
auto indice_pair_num_cpu_ptr = indice_pair_num_cpu.data_ptr<int>();
auto indice_pair_num_cpu_ptr = indice_pair_num_cpu.data_ptr<int>();
int maxnhot = 0;
int maxnhot = 0;
...
@@ -1618,7 +1617,7 @@ class ConvGemmOps(pccm.ParameterizedClass):
...
@@ -1618,7 +1617,7 @@ class ConvGemmOps(pccm.ParameterizedClass):
int kv_center = kv / 2;
int kv_center = kv / 2;
tv::Tensor din;
tv::Tensor din;
auto dfilters = allocator.zeros(
{
pccm
.
literal
(
AllocKeys
.
DFilters
)
}
,
auto dfilters = allocator.zeros(
{
pccm
.
literal
(
AllocKeys
.
DFilters
)
}
,
prev_filter_shape_vec, features.dtype(), features.device());
prev_filter_shape_vec, features.dtype(), features.device()
, stream_int
);
dfilters = dfilters.view(filters.shape());
dfilters = dfilters.view(filters.shape());
if (subm){{
if (subm){{
din = ext_mm.indice_conv_bwd_init_gemm(
{
pccm
.
literal
(
AllocKeys
.
Features
)
}
,
din = ext_mm.indice_conv_bwd_init_gemm(
{
pccm
.
literal
(
AllocKeys
.
Features
)
}
,
...
@@ -1628,7 +1627,7 @@ class ConvGemmOps(pccm.ParameterizedClass):
...
@@ -1628,7 +1627,7 @@ class ConvGemmOps(pccm.ParameterizedClass):
is_KC_not_CK, kv_center);
is_KC_not_CK, kv_center);
}}else{{
}}else{{
din = allocator.zeros(
{
pccm
.
literal
(
AllocKeys
.
DIn
)
}
,
din = allocator.zeros(
{
pccm
.
literal
(
AllocKeys
.
DIn
)
}
,
features.shape_vector(), features.dtype(), features.device());
features.shape_vector(), features.dtype(), features.device()
, stream_int
);
}}
}}
if (kv == 1 && subm){{
if (kv == 1 && subm){{
return;
return;
...
@@ -1922,10 +1921,10 @@ class ConvGemmOps(pccm.ParameterizedClass):
...
@@ -1922,10 +1921,10 @@ class ConvGemmOps(pccm.ParameterizedClass):
tv::Tensor out_features;
tv::Tensor out_features;
if (is_subm){{
if (is_subm){{
out_features = allocator.empty(
{
pccm
.
literal
(
AllocKeys
.
OutFeatures
)
}
,
out_features = allocator.empty(
{
pccm
.
literal
(
AllocKeys
.
OutFeatures
)
}
,
{{num_activate_out, out_channel}}, features.dtype(), features.device());
{{num_activate_out, out_channel}}, features.dtype(), features.device()
, stream_int
);
}}else{{
}}else{{
out_features = allocator.zeros(
{
pccm
.
literal
(
AllocKeys
.
OutFeatures
)
}
,
out_features = allocator.zeros(
{
pccm
.
literal
(
AllocKeys
.
OutFeatures
)
}
,
{{num_activate_out, out_channel}}, features.dtype(), features.device());
{{num_activate_out, out_channel}}, features.dtype(), features.device()
, stream_int
);
}}
}}
auto arch = get_compute_capability();
auto arch = get_compute_capability();
constexpr auto kForwardInt = static_cast<int>(tv::gemm::ConvOpType::kForward);
constexpr auto kForwardInt = static_cast<int>(tv::gemm::ConvOpType::kForward);
...
@@ -1966,7 +1965,7 @@ class ConvGemmOps(pccm.ParameterizedClass):
...
@@ -1966,7 +1965,7 @@ class ConvGemmOps(pccm.ParameterizedClass):
if (is_train){{
if (is_train){{
mask_output_fwd = allocator.empty(
{
pccm
.
literal
(
AllocKeys
.
MaskOutputFwd
)
}
,
mask_output_fwd = allocator.empty(
{
pccm
.
literal
(
AllocKeys
.
MaskOutputFwd
)
}
,
{{num_split, tv::div_up(num_activate_out, mask_width)}},
{{num_split, tv::div_up(num_activate_out, mask_width)}},
tv::uint32, features.device());
tv::uint32, features.device()
, stream_int
);
for (int i = 0; i < num_split; ++i){{
for (int i = 0; i < num_split; ++i){{
mask_output_fwd_splits.push_back(mask_output_fwd[i]);
mask_output_fwd_splits.push_back(mask_output_fwd[i]);
}}
}}
...
@@ -2042,13 +2041,13 @@ class ConvGemmOps(pccm.ParameterizedClass):
...
@@ -2042,13 +2041,13 @@ class ConvGemmOps(pccm.ParameterizedClass):
tv::Tensor din;
tv::Tensor din;
if (is_subm){{
if (is_subm){{
din = allocator.empty(
{
pccm
.
literal
(
AllocKeys
.
DIn
)
}
,
din = allocator.empty(
{
pccm
.
literal
(
AllocKeys
.
DIn
)
}
,
features.shape_vector(), features.dtype(), features.device());
features.shape_vector(), features.dtype(), features.device()
, stream_int
);
}}else{{
}}else{{
din = allocator.zeros(
{
pccm
.
literal
(
AllocKeys
.
DIn
)
}
,
din = allocator.zeros(
{
pccm
.
literal
(
AllocKeys
.
DIn
)
}
,
features.shape_vector(), features.dtype(), features.device());
features.shape_vector(), features.dtype(), features.device()
, stream_int
);
}}
}}
tv::Tensor dfilters = allocator.zeros(
{
pccm
.
literal
(
AllocKeys
.
DFilters
)
}
,
tv::Tensor dfilters = allocator.zeros(
{
pccm
.
literal
(
AllocKeys
.
DFilters
)
}
,
filters_shape_vec, filters.dtype(), filters.device());
filters_shape_vec, filters.dtype(), filters.device()
, stream_int
);
dfilters = dfilters.view(out_channel, -1, in_channel);
dfilters = dfilters.view(out_channel, -1, in_channel);
constexpr auto kForwardInt = static_cast<int>(tv::gemm::ConvOpType::kForward);
constexpr auto kForwardInt = static_cast<int>(tv::gemm::ConvOpType::kForward);
...
...
spconv/csrc/sparse/indices.py
View file @
21bb00ae
This diff is collapsed.
Click to expand it.
spconv/csrc/sparse/maxpool.py
View file @
21bb00ae
...
@@ -180,6 +180,85 @@ class IndiceMaxPool(pccm.Class):
...
@@ -180,6 +180,85 @@ class IndiceMaxPool(pccm.Class):
"""
)
"""
)
return
code
return
code
@
pccm
.
cuda
.
cuda_global_function
def
forward_avgpool_implicit_gemm_kernel
(
self
):
code
=
pccm
.
FunctionCode
()
code
.
targ
(
"T"
)
code
.
arg
(
"out_features"
,
f
"T*"
)
code
.
arg
(
"in_features"
,
f
"const T*"
)
code
.
arg
(
"indices"
,
"const int*"
)
code
.
arg
(
"count_out"
,
"int*"
)
code
.
arg
(
"num_features"
,
"int"
)
code
.
arg
(
"RS"
,
"int"
)
code
.
arg
(
"num_indices"
,
"int"
)
code
.
raw
(
f
"""
for (int i : tv::KernelLoopY<int>(num_indices)) {{
auto out_ptr = out_features + i * num_features;
auto indices_ptr = indices + i;
int in_idx = 0;
int count = 0;
for (int k = 0; k < RS; ++k){{
in_idx = indices_ptr[0];
count += int(in_idx != -1);
indices_ptr += num_indices;
}}
if (count_out != nullptr){{
count_out[i] = count;
}}
for (int j : tv::KernelLoopX<int>(num_features)) {{
indices_ptr = indices + i;
int in_idx;
T in, in_temp;
in = T(0);
for (int k = 0; k < RS; ++k){{
in_idx = indices_ptr[0];
bool valid = in_idx != -1;
in_temp = valid ? in_features[in_idx * num_features + j] : T(0);
in += in_temp;
indices_ptr += num_indices;
}}
out_ptr[j] = count > 0 ? in / T(count) : T(0);
}}
}}
"""
)
return
code
@
pccm
.
cuda
.
cuda_global_function
def
backward_avgpool_implicit_gemm_kernel
(
self
):
code
=
pccm
.
FunctionCode
()
code
.
targ
(
"T"
)
code
.
arg
(
"dout_features"
,
f
"const T*"
)
code
.
arg
(
"din_features"
,
f
"T*"
)
code
.
arg
(
"indices_bwd"
,
"const int*"
)
code
.
arg
(
"count_out"
,
"const int*"
)
code
.
arg
(
"num_features"
,
"int"
)
code
.
arg
(
"RS"
,
"int"
)
code
.
arg
(
"num_indices"
,
"int"
)
code
.
raw
(
f
"""
for (int i : tv::KernelLoopY<int>(num_indices)) {{
auto din_ptr = din_features + i * num_features;
for (int j : tv::KernelLoopX<int>(num_features)) {{
auto indices_ptr = indices_bwd + i;
int out_idx = 0;
T sum_val = T(0);
for (int k = 0; k < RS; ++k){{
out_idx = indices_ptr[0];
bool valid = out_idx != -1;
T dout = valid ? dout_features[out_idx * num_features + j] : T(0);
int count = valid ? count_out[out_idx] : T(0);
sum_val += dout * T(count);
indices_ptr += num_indices;
}}
din_ptr[j] = sum_val;
}}
}}
"""
)
return
code
@
pccm
.
cuda
.
static_function
@
pccm
.
cuda
.
static_function
def
forward
(
self
):
def
forward
(
self
):
code
=
pccm
.
FunctionCode
()
code
=
pccm
.
FunctionCode
()
...
@@ -348,6 +427,92 @@ class IndiceMaxPool(pccm.Class):
...
@@ -348,6 +427,92 @@ class IndiceMaxPool(pccm.Class):
"""
)
"""
)
return
code
return
code
@
pccm
.
cuda
.
static_function
def
forward_avgpool_implicit_gemm
(
self
):
code
=
pccm
.
FunctionCode
()
code
.
arg
(
"out"
,
"tv::Tensor"
)
code
.
arg
(
"in"
,
"tv::Tensor"
)
code
.
arg
(
"inds"
,
"tv::Tensor"
)
code
.
arg
(
"count_out"
,
"tv::Tensor"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
raw
(
f
"""
auto nhot = out.dim(0);
tv::check_shape(inds, {{-1, nhot}});
tv::check_shape(in, {{-1, out.dim(1)}});
auto cudastream = reinterpret_cast<cudaStream_t>(stream);
tv::dispatch<float, double, tv::half_t, tv::bfloat16_t>(out.dtype(), [&](auto I){{
using T = TV_DECLTYPE(I);
constexpr int MaxThreads = 512;
tv::cuda::Launch launcher(1);
bool found = tv::dispatch_int_noexcept<512, 256, 128, 64, 32, 16>(out.dim(1), [](int my, int expect){{return my >= expect;}}, [&](auto V){{
// if out.dim(1) > value in list above, run this function.
// if a value is found, other value won't be executed.
int NumFeatures = TV_DECLTYPE(V)::value;
int Num0 = MaxThreads / NumFeatures;
dim3 blocks(tv::div_up(out.dim(1), int64_t(NumFeatures)), tv::div_up(nhot, int64_t(Num0)));
dim3 threads(NumFeatures, Num0);
launcher = tv::cuda::Launch(blocks, threads, cudastream);
}});
if (!found){{
int NumFeatures = 16;
int Num0 = MaxThreads / NumFeatures;
dim3 blocks(tv::div_up(out.dim(1), int64_t(NumFeatures)), tv::div_up(nhot, int64_t(Num0)));
dim3 threads(NumFeatures, Num0);
launcher = tv::cuda::Launch(blocks, threads, cudastream);
}}
launcher(forward_avgpool_implicit_gemm_kernel<T>, out.data_ptr<T>(), in.data_ptr<const T>(),
inds.data_ptr<const int>(), count_out.data_ptr<int>(), out.dim(1), inds.dim(0), inds.dim(1));
}});
"""
)
return
code
@
pccm
.
cuda
.
static_function
def
backward_avgpool_implicit_gemm
(
self
):
code
=
pccm
.
FunctionCode
()
code
.
arg
(
"dout"
,
"tv::Tensor"
)
code
.
arg
(
"din"
,
"tv::Tensor"
)
code
.
arg
(
"inds"
,
"tv::Tensor"
)
code
.
arg
(
"count_out"
,
"tv::Tensor"
)
code
.
arg
(
"stream"
,
"std::uintptr_t"
,
"0"
)
code
.
raw
(
f
"""
auto nhot = din.dim(0);
TV_ASSERT_RT_ERR(!count_out.empty(), "count out must not empty")
tv::check_shape(inds, {{-1, nhot}});
tv::check_shape(din, {{-1, dout.dim(1)}});
int num_act_out = dout.dim(1);
auto cudastream = reinterpret_cast<cudaStream_t>(stream);
tv::dispatch<float, double, tv::half_t, tv::bfloat16_t>(dout.dtype(), [&](auto I){{
using T = TV_DECLTYPE(I);
constexpr int MaxThreads = 512;
tv::cuda::Launch launcher(1);
bool found = tv::dispatch_int_noexcept<512, 256, 128, 64, 32, 16>(dout.dim(1), [](int my, int expect){{return my >= expect;}}, [&](auto V){{
// if out.dim(1) > value in list above, run this function.
// if a value is found, other value won't be executed.
int NumFeatures = TV_DECLTYPE(V)::value;
int Num0 = MaxThreads / NumFeatures;
dim3 blocks(tv::div_up(dout.dim(1), int64_t(NumFeatures)), tv::div_up(nhot, int64_t(Num0)));
dim3 threads(NumFeatures, Num0);
launcher = tv::cuda::Launch(blocks, threads, cudastream);
}});
if (!found){{
int NumFeatures = 16;
int Num0 = MaxThreads / NumFeatures;
dim3 blocks(tv::div_up(dout.dim(1), int64_t(NumFeatures)), tv::div_up(nhot, int64_t(Num0)));
dim3 threads(NumFeatures, Num0);
launcher = tv::cuda::Launch(blocks, threads, cudastream);
}}
launcher(backward_avgpool_implicit_gemm_kernel<T>,
dout.data_ptr<const T>(), din.data_ptr<T>(),
inds.data_ptr<const int>(), count_out.data_ptr<const int>(),
dout.dim(1), inds.dim(0), inds.dim(1));
}});
"""
)
return
code
class
IndiceMaxPoolCPU
(
pccm
.
Class
):
class
IndiceMaxPoolCPU
(
pccm
.
Class
):
def
__init__
(
self
):
def
__init__
(
self
):
...
...
spconv/csrc/sparse/pointops.py
View file @
21bb00ae
...
@@ -297,7 +297,7 @@ class Point2Voxel(pccm.ParameterizedClass, pccm.pybind.PybindClassMixin):
...
@@ -297,7 +297,7 @@ class Point2Voxel(pccm.ParameterizedClass, pccm.pybind.PybindClassMixin):
self
.
add_dependency
(
TensorView
)
self
.
add_dependency
(
TensorView
)
self
.
p2v_c
=
Point2VoxelCommon
(
dtype
,
ndim
,
zyx
)
self
.
p2v_c
=
Point2VoxelCommon
(
dtype
,
ndim
,
zyx
)
self
.
add_param_class
(
"p2v_c"
,
self
.
p2v_c
,
"Point2VoxelCommon"
)
self
.
add_param_class
(
"p2v_c"
,
self
.
p2v_c
,
"Point2VoxelCommon"
)
layout
=
TensorGeneric
(
ndim
,
Tru
e
)
layout
=
TensorGeneric
(
ndim
,
Fals
e
)
self
.
add_param_class
(
"layout_ns"
,
layout
,
"Layout"
)
self
.
add_param_class
(
"layout_ns"
,
layout
,
"Layout"
)
self
.
dtype
=
dtype
self
.
dtype
=
dtype
self
.
ndim
=
ndim
self
.
ndim
=
ndim
...
@@ -489,7 +489,7 @@ class Point2VoxelCPU(pccm.ParameterizedClass, pccm.pybind.PybindClassMixin):
...
@@ -489,7 +489,7 @@ class Point2VoxelCPU(pccm.ParameterizedClass, pccm.pybind.PybindClassMixin):
def
__init__
(
self
,
dtype
:
dtypes
.
DType
,
ndim
:
int
,
zyx
:
bool
=
True
):
def
__init__
(
self
,
dtype
:
dtypes
.
DType
,
ndim
:
int
,
zyx
:
bool
=
True
):
super
().
__init__
()
super
().
__init__
()
self
.
add_dependency
(
TensorView
)
self
.
add_dependency
(
TensorView
)
layout
=
TensorGeneric
(
ndim
,
Tru
e
)
layout
=
TensorGeneric
(
ndim
,
Fals
e
)
self
.
add_param_class
(
"layout_ns"
,
layout
,
"Layout"
)
self
.
add_param_class
(
"layout_ns"
,
layout
,
"Layout"
)
self
.
dtype
=
dtype
self
.
dtype
=
dtype
self
.
ndim
=
ndim
self
.
ndim
=
ndim
...
...
spconv/gencode/__init__.py
View file @
21bb00ae
# Copyright 2022 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
spconv/gencode/__main__.py
View file @
21bb00ae
...
@@ -10,33 +10,41 @@ from spconv.core import (IMPLGEMM_SIMT_PARAMS, IMPLGEMM_TURING_PARAMS,
...
@@ -10,33 +10,41 @@ from spconv.core import (IMPLGEMM_SIMT_PARAMS, IMPLGEMM_TURING_PARAMS,
SHUFFLE_TURING_PARAMS
,
SHUFFLE_VOLTA_PARAMS
)
SHUFFLE_TURING_PARAMS
,
SHUFFLE_VOLTA_PARAMS
)
from
spconv.csrc.hash.core
import
HashTable
from
spconv.csrc.hash.core
import
HashTable
from
spconv.csrc.sparse.all
import
SpconvOps
from
spconv.csrc.sparse.all
import
SpconvOps
from
spconv.csrc.sparse.alloc
import
ExternalAllocator
from
spconv.csrc.sparse.alloc
import
ExternalAllocator
,
StaticAllocator
from
spconv.csrc.sparse.convops
import
(
ConvGemmOps
,
ConvTunerSimple
,
from
spconv.csrc.sparse.convops
import
(
ConvGemmOps
,
ConvTunerSimple
,
ExternalSpconvMatmul
,
GemmTunerSimple
,
ExternalSpconvMatmul
,
GemmTunerSimple
,
SimpleExternalSpconvMatmul
)
SimpleExternalSpconvMatmul
)
from
spconv.csrc.utils
import
BoxOps
from
spconv.csrc.utils
import
BoxOps
from
cumm.gemm.algospec.core
import
(
GemmAlgo
,
ShuffleStrideType
)
from
cumm.conv.bases
import
ConvLayout
,
ConvLayoutType
,
ConvOpType
def
main
(
include
:
str
,
def
main
(
include
:
str
,
src
:
str
,
src
:
str
,
libname
:
str
=
"spconv"
,
libname
:
str
=
"spconv"
,
prefix
:
str
=
"spconvlib"
):
prefix
:
str
=
"spconvlib"
,
inference_only
:
bool
=
False
):
all_shuffle
=
SHUFFLE_SIMT_PARAMS
+
SHUFFLE_VOLTA_PARAMS
+
SHUFFLE_TURING_PARAMS
all_shuffle
=
SHUFFLE_SIMT_PARAMS
+
SHUFFLE_VOLTA_PARAMS
+
SHUFFLE_TURING_PARAMS
all_shuffle
=
list
(
filter
(
lambda
x
:
not
x
.
is_nvrtc
,
all_shuffle
))
all_shuffle
=
list
(
filter
(
lambda
x
:
not
x
.
is_nvrtc
,
all_shuffle
))
if
inference_only
:
all_shuffle
=
list
(
filter
(
lambda
x
:
x
.
shuffle_stride
!=
ShuffleStrideType
.
ShuffleAB
,
all_shuffle
))
cu
=
GemmMainUnitTest
(
all_shuffle
)
cu
=
GemmMainUnitTest
(
all_shuffle
)
cu
.
namespace
=
"cumm.gemm.main"
cu
.
namespace
=
"cumm.gemm.main"
all_imp
=
(
IMPLGEMM_SIMT_PARAMS
+
IMPLGEMM_VOLTA_PARAMS
+
all_imp
=
(
IMPLGEMM_SIMT_PARAMS
+
IMPLGEMM_VOLTA_PARAMS
+
IMPLGEMM_TURING_PARAMS
)
IMPLGEMM_TURING_PARAMS
)
# all_imp = IMPLGEMM_SIMT_PARAMS
# all_imp = IMPLGEMM_SIMT_PARAMS
all_imp
=
list
(
filter
(
lambda
x
:
not
x
.
is_nvrtc
,
all_imp
))
all_imp
=
list
(
filter
(
lambda
x
:
not
x
.
is_nvrtc
,
all_imp
))
if
inference_only
:
all_imp
=
list
(
filter
(
lambda
x
:
x
.
op_type
==
ConvOpType
.
kForward
,
all_imp
))
convcu
=
ConvMainUnitTest
(
all_imp
)
convcu
=
ConvMainUnitTest
(
all_imp
)
convcu
.
namespace
=
"cumm.conv.main"
convcu
.
namespace
=
"cumm.conv.main"
gemmtuner
=
GemmTunerSimple
(
cu
)
gemmtuner
=
GemmTunerSimple
(
cu
)
gemmtuner
.
namespace
=
"csrc.sparse.convops.gemmops"
gemmtuner
.
namespace
=
"
spconv.
csrc.sparse.convops.gemmops"
convtuner
=
ConvTunerSimple
(
convcu
)
convtuner
=
ConvTunerSimple
(
convcu
)
convtuner
.
namespace
=
"csrc.sparse.convops.convops"
convtuner
.
namespace
=
"
spconv.
csrc.sparse.convops.convops"
convops
=
ConvGemmOps
(
gemmtuner
,
convtuner
)
convops
=
ConvGemmOps
(
gemmtuner
,
convtuner
)
convops
.
namespace
=
"csrc.sparse.convops.spops"
convops
.
namespace
=
"
spconv.
csrc.sparse.convops.spops"
cus
=
[
cus
=
[
cu
,
cu
,
...
@@ -51,6 +59,7 @@ def main(include: str,
...
@@ -51,6 +59,7 @@ def main(include: str,
ExternalAllocator
(),
ExternalAllocator
(),
ExternalSpconvMatmul
(),
ExternalSpconvMatmul
(),
SimpleExternalSpconvMatmul
(),
SimpleExternalSpconvMatmul
(),
StaticAllocator
(),
]
]
gen_cmake
(
libname
,
cus
,
include
,
src
,
namespace_prefix
=
prefix
)
gen_cmake
(
libname
,
cus
,
include
,
src
,
namespace_prefix
=
prefix
)
...
...
spconv/pytorch/__init__.py
View file @
21bb00ae
...
@@ -17,7 +17,9 @@ from spconv.pytorch.modules import (SparseModule, SparseSequential,
...
@@ -17,7 +17,9 @@ from spconv.pytorch.modules import (SparseModule, SparseSequential,
assign_name_for_sparse_modules
)
assign_name_for_sparse_modules
)
from
spconv.pytorch.ops
import
ConvAlgo
from
spconv.pytorch.ops
import
ConvAlgo
from
spconv.pytorch.pool
import
(
SparseMaxPool1d
,
SparseMaxPool2d
,
from
spconv.pytorch.pool
import
(
SparseMaxPool1d
,
SparseMaxPool2d
,
SparseMaxPool3d
,
SparseMaxPool4d
)
SparseMaxPool3d
,
SparseMaxPool4d
,
SparseAvgPool1d
,
SparseAvgPool2d
,
SparseAvgPool3d
)
from
spconv.pytorch.tables
import
AddTable
,
ConcatTable
,
JoinTable
from
spconv.pytorch.tables
import
AddTable
,
ConcatTable
,
JoinTable
...
...
spconv/pytorch/conv.py
View file @
21bb00ae
This diff is collapsed.
Click to expand it.
spconv/pytorch/core.py
View file @
21bb00ae
...
@@ -12,13 +12,14 @@
...
@@ -12,13 +12,14 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
typing
import
List
,
Optional
,
Tuple
,
Union
,
Dict
from
typing
import
Any
,
List
,
Optional
,
Tuple
,
Union
,
Dict
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
from
spconv.core
import
ConvAlgo
from
spconv.core
import
ConvAlgo
from
spconv.pytorch.constants
import
PYTORCH_VERSION
from
spconv.pytorch.constants
import
PYTORCH_VERSION
from
spconv.tools
import
CUDAKernelTimer
from
spconv.tools
import
CUDAKernelTimer
from
spconv.constants
import
SPCONV_FX_TRACE_MODE
if
PYTORCH_VERSION
>=
[
1
,
8
,
0
]:
if
PYTORCH_VERSION
>=
[
1
,
8
,
0
]:
try
:
try
:
...
@@ -59,7 +60,8 @@ class ThrustSortAllocator:
...
@@ -59,7 +60,8 @@ class ThrustSortAllocator:
class
IndiceData
(
object
):
class
IndiceData
(
object
):
def
__init__
(
self
,
out_indices
,
indices
,
indice_pairs
,
indice_pair_num
,
def
__init__
(
self
,
out_indices
,
indices
,
indice_pairs
,
indice_pair_num
,
spatial_shape
,
out_spatial_shape
,
is_subm
:
bool
,
algo
:
ConvAlgo
,
spatial_shape
,
out_spatial_shape
,
is_subm
:
bool
,
algo
:
ConvAlgo
,
ksize
:
List
[
int
],
stride
:
List
[
int
],
dilation
:
List
[
int
],
padding
:
List
[
int
]):
ksize
:
List
[
int
],
stride
:
List
[
int
],
dilation
:
List
[
int
],
padding
:
List
[
int
],
voxel_num
:
Optional
[
Any
]
=
None
):
self
.
out_indices
=
out_indices
self
.
out_indices
=
out_indices
self
.
indices
=
indices
self
.
indices
=
indices
self
.
indice_pairs
=
indice_pairs
self
.
indice_pairs
=
indice_pairs
...
@@ -72,6 +74,8 @@ class IndiceData(object):
...
@@ -72,6 +74,8 @@ class IndiceData(object):
self
.
stride
=
stride
self
.
stride
=
stride
self
.
dilation
=
dilation
self
.
dilation
=
dilation
self
.
padding
=
padding
self
.
padding
=
padding
# voxel_num is only used in tensorrt conversion.
self
.
voxel_num
=
voxel_num
class
ImplicitGemmIndiceData
(
object
):
class
ImplicitGemmIndiceData
(
object
):
...
@@ -83,7 +87,9 @@ class ImplicitGemmIndiceData(object):
...
@@ -83,7 +87,9 @@ class ImplicitGemmIndiceData(object):
mask_argsort_bwd_splits
:
List
[
torch
.
Tensor
],
mask_argsort_bwd_splits
:
List
[
torch
.
Tensor
],
masks
:
List
[
np
.
ndarray
],
spatial_shape
,
masks
:
List
[
np
.
ndarray
],
spatial_shape
,
out_spatial_shape
,
is_subm
:
bool
,
algo
:
ConvAlgo
,
out_spatial_shape
,
is_subm
:
bool
,
algo
:
ConvAlgo
,
ksize
:
List
[
int
],
stride
:
List
[
int
],
dilation
:
List
[
int
],
padding
:
List
[
int
]):
ksize
:
List
[
int
],
stride
:
List
[
int
],
dilation
:
List
[
int
],
padding
:
List
[
int
],
in_voxel_num
:
Optional
[
Any
]
=
None
,
out_voxel_num
:
Optional
[
Any
]
=
None
):
self
.
out_indices
=
out_indices
self
.
out_indices
=
out_indices
self
.
indices
=
indices
self
.
indices
=
indices
self
.
pair_fwd
=
pair_fwd
self
.
pair_fwd
=
pair_fwd
...
@@ -101,6 +107,9 @@ class ImplicitGemmIndiceData(object):
...
@@ -101,6 +107,9 @@ class ImplicitGemmIndiceData(object):
self
.
stride
=
stride
self
.
stride
=
stride
self
.
dilation
=
dilation
self
.
dilation
=
dilation
self
.
padding
=
padding
self
.
padding
=
padding
# in/out voxel_num is only used in tensorrt conversion.
self
.
in_voxel_num
=
in_voxel_num
self
.
out_voxel_num
=
out_voxel_num
def
scatter_nd
(
indices
,
updates
,
shape
):
def
scatter_nd
(
indices
,
updates
,
shape
):
...
@@ -147,11 +156,12 @@ class SparseConvTensor(metaclass=SpConvTensorMeta):
...
@@ -147,11 +156,12 @@ class SparseConvTensor(metaclass=SpConvTensorMeta):
force_algo: force conv/pool layers use this algo, should only used for debug.
force_algo: force conv/pool layers use this algo, should only used for debug.
"""
"""
ndim
=
indices
.
shape
[
1
]
-
1
ndim
=
indices
.
shape
[
1
]
-
1
assert
features
.
ndim
==
2
if
not
SPCONV_FX_TRACE_MODE
:
assert
indices
.
ndim
==
2
assert
features
.
ndim
==
2
assert
len
(
spatial_shape
)
==
ndim
,
"spatial shape must equal to ndim"
assert
indices
.
ndim
==
2
assert
indices
.
dtype
==
torch
.
int32
,
"only support int32"
assert
len
(
spatial_shape
)
==
ndim
,
"spatial shape must equal to ndim"
assert
batch_size
>
0
assert
indices
.
dtype
==
torch
.
int32
,
"only support int32"
assert
batch_size
>
0
self
.
_features
=
features
self
.
_features
=
features
self
.
indices
=
indices
self
.
indices
=
indices
self
.
spatial_shape
=
[
int
(
v
)
for
v
in
spatial_shape
]
self
.
spatial_shape
=
[
int
(
v
)
for
v
in
spatial_shape
]
...
...
spconv/pytorch/cppcore.py
View file @
21bb00ae
...
@@ -103,7 +103,7 @@ class TorchAllocator(ExternalAllocator):
...
@@ -103,7 +103,7 @@ class TorchAllocator(ExternalAllocator):
self
.
allocated
:
Dict
[
Union
[
str
,
int
],
torch
.
Tensor
]
=
{}
self
.
allocated
:
Dict
[
Union
[
str
,
int
],
torch
.
Tensor
]
=
{}
def
zeros
(
self
,
name
:
str
,
shape
:
List
[
int
],
dtype
:
int
,
def
zeros
(
self
,
name
:
str
,
shape
:
List
[
int
],
dtype
:
int
,
device
:
int
,
is_temp_memory
:
bool
=
False
,
stream
:
int
=
0
)
->
tv
.
Tensor
:
device
:
int
,
stream
:
int
=
0
,
is_temp_memory
:
bool
=
False
)
->
tv
.
Tensor
:
# TODO free memory by name if its already free by pointer.
# TODO free memory by name if its already free by pointer.
# provide a name if you want to access it after c++ function exit.
# provide a name if you want to access it after c++ function exit.
torch_uint_workaround
=
dtype
in
_TORCH_UINT_WORKAROUNDS
torch_uint_workaround
=
dtype
in
_TORCH_UINT_WORKAROUNDS
...
@@ -126,7 +126,7 @@ class TorchAllocator(ExternalAllocator):
...
@@ -126,7 +126,7 @@ class TorchAllocator(ExternalAllocator):
return
ten_tv
return
ten_tv
def
empty
(
self
,
name
:
str
,
shape
:
List
[
int
],
dtype
:
int
,
def
empty
(
self
,
name
:
str
,
shape
:
List
[
int
],
dtype
:
int
,
device
:
int
,
is_temp_memory
:
bool
=
False
,
stream
:
int
=
0
)
->
tv
.
Tensor
:
device
:
int
,
stream
:
int
=
0
,
is_temp_memory
:
bool
=
False
)
->
tv
.
Tensor
:
torch_uint_workaround
=
dtype
in
_TORCH_UINT_WORKAROUNDS
torch_uint_workaround
=
dtype
in
_TORCH_UINT_WORKAROUNDS
dtype_bkp
=
dtype
dtype_bkp
=
dtype
if
dtype
in
_TORCH_UINT_WORKAROUNDS
:
if
dtype
in
_TORCH_UINT_WORKAROUNDS
:
...
@@ -147,7 +147,7 @@ class TorchAllocator(ExternalAllocator):
...
@@ -147,7 +147,7 @@ class TorchAllocator(ExternalAllocator):
return
ten_tv
return
ten_tv
def
full_int
(
self
,
name
:
str
,
shape
:
List
[
int
],
value
:
int
,
dtype
:
int
,
def
full_int
(
self
,
name
:
str
,
shape
:
List
[
int
],
value
:
int
,
dtype
:
int
,
device
:
int
,
is_temp_memory
:
bool
=
False
,
stream
:
int
=
0
)
->
tv
.
Tensor
:
device
:
int
,
stream
:
int
=
0
,
is_temp_memory
:
bool
=
False
)
->
tv
.
Tensor
:
if
dtype
in
_TORCH_UINT_WORKAROUNDS
and
value
<
0
:
if
dtype
in
_TORCH_UINT_WORKAROUNDS
and
value
<
0
:
raise
NotImplementedError
(
"you can't use full for unsigned dtypes"
)
raise
NotImplementedError
(
"you can't use full for unsigned dtypes"
)
torch_uint_workaround
=
dtype
in
_TORCH_UINT_WORKAROUNDS
torch_uint_workaround
=
dtype
in
_TORCH_UINT_WORKAROUNDS
...
@@ -171,7 +171,7 @@ class TorchAllocator(ExternalAllocator):
...
@@ -171,7 +171,7 @@ class TorchAllocator(ExternalAllocator):
return
ten_tv
return
ten_tv
def
full_float
(
self
,
name
:
str
,
shape
:
List
[
int
],
value
:
float
,
dtype
:
int
,
def
full_float
(
self
,
name
:
str
,
shape
:
List
[
int
],
value
:
float
,
dtype
:
int
,
device
:
int
,
is_temp_memory
:
bool
=
False
,
stream
:
int
=
0
)
->
tv
.
Tensor
:
device
:
int
,
stream
:
int
=
0
,
is_temp_memory
:
bool
=
False
)
->
tv
.
Tensor
:
if
dtype
in
_TORCH_UINT_WORKAROUNDS
and
value
<
0
:
if
dtype
in
_TORCH_UINT_WORKAROUNDS
and
value
<
0
:
raise
NotImplementedError
(
"you can't use full for unsigned dtypes"
)
raise
NotImplementedError
(
"you can't use full for unsigned dtypes"
)
torch_uint_workaround
=
dtype
in
_TORCH_UINT_WORKAROUNDS
torch_uint_workaround
=
dtype
in
_TORCH_UINT_WORKAROUNDS
...
...
spconv/pytorch/functional.py
View file @
21bb00ae
...
@@ -361,6 +361,25 @@ class SparseMaxPoolImplicitGemmFunction(Function):
...
@@ -361,6 +361,25 @@ class SparseMaxPoolImplicitGemmFunction(Function):
features
,
out
,
grad_output
,
indice_pairs_bwd
)
features
,
out
,
grad_output
,
indice_pairs_bwd
)
return
input_bp
,
None
,
None
,
None
return
input_bp
,
None
,
None
,
None
class
SparseAvgPoolImplicitGemmFunction
(
Function
):
@
staticmethod
@
_TORCH_CUSTOM_FWD
def
forward
(
ctx
,
features
:
torch
.
Tensor
,
indice_pairs_fwd
:
torch
.
Tensor
,
indice_pairs_bwd
:
torch
.
Tensor
,
num_activate_out
:
int
,
calc_count
):
out
,
count
=
ops
.
indice_avgpool_implicit_gemm
(
features
,
indice_pairs_fwd
,
num_activate_out
,
calc_count
)
ctx
.
save_for_backward
(
indice_pairs_bwd
,
features
,
out
,
count
)
return
out
@
staticmethod
@
once_differentiable
@
_TORCH_CUSTOM_BWD
def
backward
(
ctx
,
grad_output
):
indice_pairs_bwd
,
features
,
out
,
count
=
ctx
.
saved_tensors
input_bp
=
ops
.
indice_avgpool_implicit_gemm_backward
(
grad_output
,
indice_pairs_bwd
,
count
)
return
input_bp
,
None
,
None
,
None
,
None
indice_conv
=
SparseConvFunction
.
apply
indice_conv
=
SparseConvFunction
.
apply
implicit_gemm
=
SparseImplicitGemmFunction
.
apply
implicit_gemm
=
SparseImplicitGemmFunction
.
apply
...
@@ -368,6 +387,7 @@ indice_inverse_conv = SparseInverseConvFunction.apply
...
@@ -368,6 +387,7 @@ indice_inverse_conv = SparseInverseConvFunction.apply
indice_subm_conv
=
SubMConvFunction
.
apply
indice_subm_conv
=
SubMConvFunction
.
apply
indice_maxpool
=
SparseMaxPoolFunction
.
apply
indice_maxpool
=
SparseMaxPoolFunction
.
apply
indice_maxpool_implicit_gemm
=
SparseMaxPoolImplicitGemmFunction
.
apply
indice_maxpool_implicit_gemm
=
SparseMaxPoolImplicitGemmFunction
.
apply
indice_avgpool_implicit_gemm
=
SparseAvgPoolImplicitGemmFunction
.
apply
def
_indice_to_scalar
(
indices
:
torch
.
Tensor
,
shape
:
List
[
int
]):
def
_indice_to_scalar
(
indices
:
torch
.
Tensor
,
shape
:
List
[
int
]):
...
...
spconv/pytorch/modules.py
View file @
21bb00ae
...
@@ -132,12 +132,11 @@ class SparseSequential(SparseModule):
...
@@ -132,12 +132,11 @@ class SparseSequential(SparseModule):
if
isinstance
(
input
,
list
):
if
isinstance
(
input
,
list
):
input
=
module
(
input
)
input
=
module
(
input
)
else
:
else
:
assert
isinstance
(
input
,
spconv
.
SparseConvTensor
)
#
assert isinstance(input, spconv.SparseConvTensor)
# self._sparity_dict[k] = input.sparity
# self._sparity_dict[k] = input.sparity
input
=
module
(
input
)
input
=
module
(
input
)
else
:
else
:
if
isinstance
(
input
,
spconv
.
SparseConvTensor
):
if
isinstance
(
input
,
spconv
.
SparseConvTensor
):
print
(
input
.
features
.
shape
)
if
input
.
indices
.
shape
[
0
]
!=
0
:
if
input
.
indices
.
shape
[
0
]
!=
0
:
input
=
input
.
replace_feature
(
module
(
input
.
features
))
input
=
input
.
replace_feature
(
module
(
input
.
features
))
else
:
else
:
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment