Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
one
spconv
Commits
62c1496f
Commit
62c1496f
authored
Oct 19, 2021
by
yan.yan
Browse files
bug fix
parent
01ed382c
Changes
18
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
508 additions
and
354 deletions
+508
-354
.github/workflows/build.yaml
.github/workflows/build.yaml
+96
-0
README.md
README.md
+0
-1
pyproject.toml
pyproject.toml
+1
-1
setup.py
setup.py
+2
-3
spconv/build.py
spconv/build.py
+0
-1
spconv/core_cc/csrc/sparse/all/__init__.pyi
spconv/core_cc/csrc/sparse/all/__init__.pyi
+8
-15
spconv/csrc/sparse/all.py
spconv/csrc/sparse/all.py
+21
-38
spconv/csrc/sparse/indices.py
spconv/csrc/sparse/indices.py
+53
-80
spconv/csrc/sparse/maxpool.py
spconv/csrc/sparse/maxpool.py
+2
-1
spconv/pytorch/conv.py
spconv/pytorch/conv.py
+80
-20
spconv/pytorch/modules.py
spconv/pytorch/modules.py
+0
-47
spconv/pytorch/ops.py
spconv/pytorch/ops.py
+82
-81
spconv/pytorch/pool.py
spconv/pytorch/pool.py
+28
-0
test/aaa.py
test/aaa.py
+94
-45
test/benchmark.py
test/benchmark.py
+6
-5
test/data/test_spconv.pkl
test/data/test_spconv.pkl
+0
-0
test/test_conv.py
test/test_conv.py
+33
-14
tools/build-wheels.sh
tools/build-wheels.sh
+2
-2
No files found.
.github/workflows/build.yaml
0 → 100644
View file @
62c1496f
name
:
build
on
:
push
:
branches
:
-
main
-
feature/*
tags
:
-
'
*'
jobs
:
build-windows
:
runs-on
:
windows-latest
strategy
:
matrix
:
python-version
:
[
'
3.6'
,
'
3.7'
,
'
3.8'
,
'
3.9'
,
'
3.10'
]
cuda-version
:
[
'
10.2'
,
'
11.1'
,
'
11.4'
]
steps
:
-
uses
:
actions/checkout@master
-
name
:
Install CUDA
env
:
CUDA_VERSION
:
${{ matrix.cuda-version }}
PYTHON_VERSION
:
${{ matrix.python-version }}
cuda
:
${{ matrix.cuda-version }}
if
:
(github.event_name == 'push' && (startsWith(github.ref, 'refs/tags')) && (env.CUDA_VERSION != '') ) || (env.CUDA_VERSION == '11.1' && env.PYTHON_VERSION == '3.10')
shell
:
powershell
run
:
.\tools\install_windows_cuda.ps1
-
name
:
Set up Python ${{ matrix.python-version }}
uses
:
actions/setup-python@v2
with
:
python-version
:
${{ matrix.python-version }}
-
uses
:
ilammy/msvc-dev-cmd@v1
-
name
:
Install pep build
run
:
|
python -m pip install build --user
python -m pip install --upgrade pip twine wheel
python -m pip install pytest setuptools
-
name
:
Build a windows binary wheel
env
:
CUDA_VERSION
:
${{ matrix.cuda-version }}
PYTHON_VERSION
:
${{ matrix.python-version }}
if
:
(github.event_name == 'push' && (startsWith(github.ref, 'refs/tags')) ) || (env.CUDA_VERSION == '11.1' && env.PYTHON_VERSION == '3.10')
run
:
|
$Env:CUMM_CUDA_VERSION = "${{ matrix.cuda-version }}"
$Env:CUMM_CUDA_ARCH_LIST = "all"
$Env:SPCONV_DISABLE_JIT = "1"
pip install pccm pybind11
python -m build --wheel --outdir dist/ .
shell
:
powershell
-
name
:
Publish a Python distribution to PyPI
if
:
github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
run
:
|
$Env:TWINE_USERNAME = "__token__"
$Env:TWINE_PASSWORD = "${{ secrets.pypi_password }}"
twine upload dist/*
shell
:
powershell
build
:
needs
:
build-windows
runs-on
:
ubuntu-20.04
strategy
:
matrix
:
python-version
:
[
'
3.8'
]
# this version is only used for upload.
cuda-version
:
[
'
102'
,
'
111'
,
'
114'
]
steps
:
-
uses
:
actions/checkout@master
-
name
:
Set up Python ${{ matrix.python-version }}
uses
:
actions/setup-python@v2
with
:
python-version
:
${{ matrix.python-version }}
-
name
:
Install pep build
run
:
|
python -m pip install build --user
python -m pip install --upgrade pip twine wheel
python -m pip install pytest setuptools
-
name
:
Build a cuda wheel
env
:
CUDA_VERSION
:
${{ matrix.cuda-version }}
PYTHON_VERSION
:
${{ matrix.python-version }}
DOCKER_IMAGE
:
scrin/manylinux2014-cuda:cu${{ matrix.cuda-version }}-devel
PLAT
:
manylinux2014_x86_64
if
:
(github.event_name == 'push' && (startsWith(github.ref, 'refs/tags')) && (env.CUDA_VERSION != '') ) || env.CUDA_VERSION == '114'
run
:
|
docker run --rm -e PLAT=$PLAT -e CUMM_CUDA_VERSION=${{ matrix.cuda-version }} -v `pwd`:/io $DOCKER_IMAGE bash -c "/io/tools/build-wheels.sh"
-
name
:
Publish a Python distribution to PyPI
if
:
github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
uses
:
pypa/gh-action-pypi-publish@master
with
:
user
:
__token__
password
:
${{ secrets.pypi_password }}
\ No newline at end of file
README.md
View file @
62c1496f
...
@@ -34,7 +34,6 @@
...
@@ -34,7 +34,6 @@
*
training/inference speed is increased
*
training/inference speed is increased
*
support int8/tensor core
*
support int8/tensor core
*
doesn't depend on pytorch binary.
*
doesn't depend on pytorch binary.
*
If your GPU has tensor core, try mixed precision training in spconv 2.x!
*
since spconv 2.x doesn't depend on pytorch binary (never in future), it's impossible to support torch.jit/libtorch inference.
*
since spconv 2.x doesn't depend on pytorch binary (never in future), it's impossible to support torch.jit/libtorch inference.
## TODO in Spconv 2.x
## TODO in Spconv 2.x
...
...
pyproject.toml
View file @
62c1496f
[build-system]
[build-system]
requires
=
[
"setuptools>=41.0"
,
"wheel"
,
"pccm>=0.2.5"
,
"cumm>=0.1.
3
"
]
requires
=
[
"setuptools>=41.0"
,
"wheel"
,
"pccm>=0.2.5"
,
"cumm>=0.1.
5
"
]
build-backend
=
"setuptools.build_meta"
build-backend
=
"setuptools.build_meta"
setup.py
View file @
62c1496f
...
@@ -30,7 +30,7 @@ DESCRIPTION = 'spatial sparse convolution'
...
@@ -30,7 +30,7 @@ DESCRIPTION = 'spatial sparse convolution'
URL
=
'https://github.com/traveller59/spconv'
URL
=
'https://github.com/traveller59/spconv'
EMAIL
=
'yanyan.sub@outlook.com'
EMAIL
=
'yanyan.sub@outlook.com'
AUTHOR
=
'Yan Yan'
AUTHOR
=
'Yan Yan'
REQUIRES_PYTHON
=
'>=3.
7
'
REQUIRES_PYTHON
=
'>=3.
6
'
VERSION
=
None
VERSION
=
None
# What packages are required for this module to be executed?
# What packages are required for this module to be executed?
...
@@ -134,12 +134,11 @@ if disable_jit is not None and disable_jit == "1":
...
@@ -134,12 +134,11 @@ if disable_jit is not None and disable_jit == "1":
}
}
from
cumm.gemm.main
import
GemmMainUnitTest
,
SHUFFLE_SIMT_PARAMS
,
SHUFFLE_VOLTA_PARAMS
,
SHUFFLE_TURING_PARAMS
from
cumm.gemm.main
import
GemmMainUnitTest
,
SHUFFLE_SIMT_PARAMS
,
SHUFFLE_VOLTA_PARAMS
,
SHUFFLE_TURING_PARAMS
from
spconv.csrc.sparse.all
import
SpconvOps
from
spconv.csrc.sparse.all
import
SpconvOps
from
cumm.gemm.gather
import
GatherAll
cu
=
GemmMainUnitTest
(
SHUFFLE_SIMT_PARAMS
+
SHUFFLE_VOLTA_PARAMS
+
SHUFFLE_TURING_PARAMS
)
cu
=
GemmMainUnitTest
(
SHUFFLE_SIMT_PARAMS
+
SHUFFLE_VOLTA_PARAMS
+
SHUFFLE_TURING_PARAMS
)
cu
.
namespace
=
"cumm.gemm.main"
cu
.
namespace
=
"cumm.gemm.main"
ext_modules
:
List
[
Extension
]
=
[
ext_modules
:
List
[
Extension
]
=
[
PCCMExtension
([
cu
,
SpconvOps
()
,
GatherAll
()
],
PCCMExtension
([
cu
,
SpconvOps
()],
"spconv/core_cc"
,
"spconv/core_cc"
,
Path
(
__file__
).
resolve
().
parent
/
"spconv"
)
Path
(
__file__
).
resolve
().
parent
/
"spconv"
)
]
]
...
...
spconv/build.py
View file @
62c1496f
...
@@ -22,7 +22,6 @@ from .constants import PACKAGE_NAME, PACKAGE_ROOT
...
@@ -22,7 +22,6 @@ from .constants import PACKAGE_NAME, PACKAGE_ROOT
if
project_is_installed
(
PACKAGE_NAME
)
and
project_is_editable
(
PACKAGE_NAME
):
if
project_is_installed
(
PACKAGE_NAME
)
and
project_is_editable
(
PACKAGE_NAME
):
from
cumm.gemm.main
import
GemmMainUnitTest
,
SHUFFLE_SIMT_PARAMS
,
SHUFFLE_VOLTA_PARAMS
,
SHUFFLE_TURING_PARAMS
from
cumm.gemm.main
import
GemmMainUnitTest
,
SHUFFLE_SIMT_PARAMS
,
SHUFFLE_VOLTA_PARAMS
,
SHUFFLE_TURING_PARAMS
from
spconv.csrc.sparse.all
import
SpconvOps
from
spconv.csrc.sparse.all
import
SpconvOps
# from cumm.gemm.gather import GatherAll, ScatterAll
cu
=
GemmMainUnitTest
(
SHUFFLE_SIMT_PARAMS
+
SHUFFLE_VOLTA_PARAMS
+
SHUFFLE_TURING_PARAMS
)
cu
=
GemmMainUnitTest
(
SHUFFLE_SIMT_PARAMS
+
SHUFFLE_VOLTA_PARAMS
+
SHUFFLE_TURING_PARAMS
)
cu
.
namespace
=
"cumm.gemm.main"
cu
.
namespace
=
"cumm.gemm.main"
pccm
.
builder
.
build_pybind
([
cu
,
SpconvOps
()],
pccm
.
builder
.
build_pybind
([
cu
,
SpconvOps
()],
...
...
spconv/core_cc/csrc/sparse/all/__init__.pyi
View file @
62c1496f
...
@@ -3,14 +3,12 @@ from pccm.stubs import EnumValue, EnumClassValue
...
@@ -3,14 +3,12 @@ from pccm.stubs import EnumValue, EnumClassValue
from cumm.tensorview import Tensor
from cumm.tensorview import Tensor
class SpconvOps:
class SpconvOps:
@staticmethod
@staticmethod
def generate_conv_inds(indices: Tensor,
hashdata: Tensor,
indice_pairs: Tensor, indice_pairs_uniq: Tensor,
out_inds: Tensor,
indice_num_per_loc: Tensor, batch_size: int, output_dims: List[int], input_dims: List[int], ksize: List[int], stride: List[int], padding: List[int], dilation: List[int]) ->
int
:
def generate_conv_inds
_stage1
(indices: Tensor, indice_pairs: Tensor, indice_pairs_uniq: Tensor, indice_num_per_loc: Tensor, batch_size: int, output_dims: List[int], input_dims: List[int], ksize: List[int], stride: List[int], padding: List[int], dilation: List[int]
, transposed: bool = False, stream_int: int = 0
) ->
None
:
"""
"""
Args:
Args:
indices:
indices:
hashdata:
indice_pairs:
indice_pairs:
indice_pairs_uniq:
indice_pairs_uniq:
out_inds:
indice_num_per_loc:
indice_num_per_loc:
batch_size:
batch_size:
output_dims:
output_dims:
...
@@ -19,28 +17,22 @@ class SpconvOps:
...
@@ -19,28 +17,22 @@ class SpconvOps:
stride:
stride:
padding:
padding:
dilation:
dilation:
transposed:
stream_int:
"""
"""
...
...
@staticmethod
@staticmethod
def generate_conv_inds_stage1(
indices: Tensor, indice_pairs: Tensor,
indice_pairs_uniq: Tensor,
i
ndi
ce_num_per_loc: Tensor, batch_size: int, output_dims: List[int], input_dims: List[int], ksize: List[int], stride: List[int], padding: List[int], dilation: List[int]
, stream_int: int = 0) -> int:
def generate_conv_inds_stage1
_5
(indice_pairs_uniq: Tensor, ndi
m: int, uniq_size: int
, stream_int: int = 0) -> int:
"""
"""
Args:
Args:
indices:
indice_pairs:
indice_pairs_uniq:
indice_pairs_uniq:
indice_num_per_loc:
ndim:
batch_size:
uniq_size:
output_dims:
input_dims:
ksize:
stride:
padding:
dilation:
stream_int:
stream_int:
"""
"""
...
...
@staticmethod
@staticmethod
def generate_conv_inds_stage2(indices: Tensor, hashdata: Tensor, indice_pairs: Tensor, indice_pairs_uniq: Tensor, out_inds: Tensor, num_out_act: int, batch_size: int, output_dims: List[int], input_dims: List[int], ksize: List[int], stride: List[int], padding: List[int], dilation: List[int], stream_int: int = 0) -> int:
def generate_conv_inds_stage2(indices: Tensor, hashdata: Tensor, indice_pairs: Tensor, indice_pairs_uniq: Tensor, out_inds: Tensor, num_out_act: int, batch_size: int, output_dims: List[int], input_dims: List[int], ksize: List[int], stride: List[int], padding: List[int], dilation: List[int],
transposed: bool = False,
stream_int: int = 0) -> int:
"""
"""
Args:
Args:
indices:
indices:
...
@@ -56,6 +48,7 @@ class SpconvOps:
...
@@ -56,6 +48,7 @@ class SpconvOps:
stride:
stride:
padding:
padding:
dilation:
dilation:
transposed:
stream_int:
stream_int:
"""
"""
...
...
...
...
spconv/csrc/sparse/all.py
View file @
62c1496f
...
@@ -35,26 +35,30 @@ class SpconvOps(pccm.Class):
...
@@ -35,26 +35,30 @@ class SpconvOps(pccm.Class):
problem
=
ConvProblem
(
ndim
,
ConvOpType
.
kForward
,
NHWC
,
NHWC
,
NHWC
)
problem
=
ConvProblem
(
ndim
,
ConvOpType
.
kForward
,
NHWC
,
NHWC
,
NHWC
)
indices
=
SparseConvIndicesKernel
(
problem
,
dtypes
.
int32
)
indices
=
SparseConvIndicesKernel
(
problem
,
dtypes
.
int32
)
# self.add_param_class("ops", indices, "SpconvIndices")
# self.add_param_class("ops", indices, "SpconvIndices")
cuda_funcs
=
[
self
.
generate_conv_inds
,
self
.
generate_subm_conv_inds
,
cuda_funcs
=
[
self
.
generate_subm_conv_inds
,
self
.
generate_conv_inds_stage1
,
self
.
generate_conv_inds_stage2
,
self
.
sort_1d_by_key
]
self
.
generate_conv_inds_stage1
,
self
.
generate_conv_inds_stage1_5
,
self
.
generate_conv_inds_stage2
,
self
.
sort_1d_by_key
]
self
.
add_impl_only_param_class
(
cuda_funcs
,
f
"ops
{
ndim
}
d"
,
indices
,
f
"SpconvIndices
{
ndim
}
D"
)
self
.
add_impl_only_param_class
(
cuda_funcs
,
f
"ops
{
ndim
}
d"
,
indices
,
f
"SpconvIndices
{
ndim
}
D"
)
@
pccm
.
pybind
.
mark
@
pccm
.
pybind
.
mark
@
pccm
.
cuda
.
static_function
@
pccm
.
cuda
.
static_function
def
generate_conv_inds
(
self
):
def
generate_conv_inds
_stage1
(
self
):
code
=
pccm
.
FunctionCode
()
code
=
pccm
.
FunctionCode
()
code
.
arg
(
"indices
, hashdata
"
,
"tv::Tensor"
)
code
.
arg
(
"indices"
,
"tv::Tensor"
)
code
.
arg
(
"indice_pairs, indice_pairs_uniq,
out_inds,
indice_num_per_loc"
,
"tv::Tensor"
)
code
.
arg
(
"indice_pairs, indice_pairs_uniq, indice_num_per_loc"
,
"tv::Tensor"
)
code
.
arg
(
"batch_size"
,
"int"
)
code
.
arg
(
"batch_size"
,
"int"
)
code
.
arg
(
"output_dims, input_dims"
,
f
"std::vector<int>"
)
code
.
arg
(
"output_dims, input_dims"
,
f
"std::vector<int>"
)
code
.
arg
(
"ksize, stride, padding, dilation"
,
f
"std::vector<int>"
)
code
.
arg
(
"ksize, stride, padding, dilation"
,
f
"std::vector<int>"
)
code
.
arg
(
"transposed"
,
f
"bool"
,
"false"
)
code
.
arg
(
"stream_int"
,
f
"std::uintptr_t"
,
"0"
,
pyanno
=
"int"
)
code
.
raw
(
f
"""
code
.
raw
(
f
"""
int ndim = indices.dim(1) - 1;
int ndim = indices.dim(1) - 1;
TV_ASSERT_RT_ERR(output_dims.size() == ndim && input_dims.size() == ndim &&
TV_ASSERT_RT_ERR(output_dims.size() == ndim && input_dims.size() == ndim &&
ksize.size() == ndim && stride.size() == ndim && dilation.size() == ndim &&
ksize.size() == ndim && stride.size() == ndim && dilation.size() == ndim &&
padding.size() == ndim, "your params size not equal to ndim", ndim);
padding.size() == ndim, "your params size not equal to ndim", ndim);
"""
)
"""
)
for
ndim
in
self
.
ndims
:
for
ndim
in
self
.
ndims
:
code
.
raw
(
f
"""
code
.
raw
(
f
"""
if (ndim ==
{
ndim
}
){{
if (ndim ==
{
ndim
}
){{
...
@@ -68,53 +72,31 @@ class SpconvOps(pccm.Class):
...
@@ -68,53 +72,31 @@ class SpconvOps(pccm.Class):
padding_[i] = padding[i];
padding_[i] = padding[i];
dilation_[i] = dilation[i];
dilation_[i] = dilation[i];
}}
}}
return SpconvIndices
{
ndim
}
D::generate_conv_inds(indices,
hashdata,
return SpconvIndices
{
ndim
}
D::generate_conv_inds
_stage1
(indices,
indice_pairs, indice_pairs_uniq,
out_inds,
indice_num_per_loc,
indice_pairs, indice_pairs_uniq, indice_num_per_loc,
batch_size, output_dims_, input_dims_,
batch_size, output_dims_, input_dims_,
ksize_, stride_, padding_, dilation_);
ksize_, stride_, padding_, dilation_
, transposed, stream_int
);
}}
}}
"""
)
"""
)
code
.
raw
(
f
"""TV_THROW_RT_ERR("unknown ndim", ndim);"""
)
code
.
raw
(
f
"""TV_THROW_RT_ERR("unknown ndim", ndim);"""
)
return
code
.
ret
(
"int"
)
return
code
# .ret("int")
@
pccm
.
pybind
.
mark
@
pccm
.
pybind
.
mark
@
pccm
.
cuda
.
static_function
@
pccm
.
cuda
.
static_function
def
generate_conv_inds_stage1
(
self
):
def
generate_conv_inds_stage1
_5
(
self
):
code
=
pccm
.
FunctionCode
()
code
=
pccm
.
FunctionCode
()
code
.
arg
(
"indices"
,
"tv::Tensor"
)
code
.
arg
(
"indice_pairs_uniq"
,
"tv::Tensor"
)
code
.
arg
(
"indice_pairs, indice_pairs_uniq, indice_num_per_loc"
,
"tv::Tensor"
)
code
.
arg
(
"ndim"
,
"int"
)
code
.
arg
(
"batch_size"
,
"int"
)
code
.
arg
(
"uniq_size"
,
"int64_t"
)
code
.
arg
(
"output_dims, input_dims"
,
f
"std::vector<int>"
)
code
.
arg
(
"ksize, stride, padding, dilation"
,
f
"std::vector<int>"
)
code
.
arg
(
"stream_int"
,
f
"std::uintptr_t"
,
"0"
,
pyanno
=
"int"
)
code
.
arg
(
"stream_int"
,
f
"std::uintptr_t"
,
"0"
,
pyanno
=
"int"
)
code
.
raw
(
f
"""
int ndim = indices.dim(1) - 1;
TV_ASSERT_RT_ERR(output_dims.size() == ndim && input_dims.size() == ndim &&
ksize.size() == ndim && stride.size() == ndim && dilation.size() == ndim &&
padding.size() == ndim, "your params size not equal to ndim", ndim);
"""
)
for
ndim
in
self
.
ndims
:
for
ndim
in
self
.
ndims
:
code
.
raw
(
f
"""
code
.
raw
(
f
"""
if (ndim ==
{
ndim
}
){{
if (ndim ==
{
ndim
}
){{
tv::array<int,
{
ndim
}
> output_dims_, input_dims_;
return SpconvIndices
{
ndim
}
D::generate_conv_inds_stage1_5(indice_pairs_uniq, uniq_size, stream_int);
tv::array<int,
{
ndim
}
> ksize_, stride_, padding_, dilation_;
for (int i = 0; i <
{
ndim
}
; ++i){{
output_dims_[i] = output_dims[i];
input_dims_[i] = input_dims[i];
ksize_[i] = ksize[i];
stride_[i] = stride[i];
padding_[i] = padding[i];
dilation_[i] = dilation[i];
}}
return SpconvIndices
{
ndim
}
D::generate_conv_inds_stage1(indices,
indice_pairs, indice_pairs_uniq, indice_num_per_loc,
batch_size, output_dims_, input_dims_,
ksize_, stride_, padding_, dilation_);
}}
}}
"""
)
"""
)
code
.
raw
(
f
"""TV_THROW_RT_ERR("unknown ndim", ndim);"""
)
code
.
raw
(
f
"""TV_THROW_RT_ERR("unknown ndim", ndim);"""
)
return
code
.
ret
(
"int"
)
return
code
.
ret
(
"int"
)
@
pccm
.
pybind
.
mark
@
pccm
.
pybind
.
mark
...
@@ -127,6 +109,7 @@ class SpconvOps(pccm.Class):
...
@@ -127,6 +109,7 @@ class SpconvOps(pccm.Class):
code
.
arg
(
"batch_size"
,
"int"
)
code
.
arg
(
"batch_size"
,
"int"
)
code
.
arg
(
"output_dims, input_dims"
,
f
"std::vector<int>"
)
code
.
arg
(
"output_dims, input_dims"
,
f
"std::vector<int>"
)
code
.
arg
(
"ksize, stride, padding, dilation"
,
f
"std::vector<int>"
)
code
.
arg
(
"ksize, stride, padding, dilation"
,
f
"std::vector<int>"
)
code
.
arg
(
"transposed"
,
f
"bool"
,
"false"
)
code
.
arg
(
"stream_int"
,
f
"std::uintptr_t"
,
"0"
,
pyanno
=
"int"
)
code
.
arg
(
"stream_int"
,
f
"std::uintptr_t"
,
"0"
,
pyanno
=
"int"
)
code
.
raw
(
f
"""
code
.
raw
(
f
"""
int ndim = indices.dim(1) - 1;
int ndim = indices.dim(1) - 1;
...
@@ -151,7 +134,7 @@ class SpconvOps(pccm.Class):
...
@@ -151,7 +134,7 @@ class SpconvOps(pccm.Class):
return SpconvIndices
{
ndim
}
D::generate_conv_inds_stage2(indices, hashdata,
return SpconvIndices
{
ndim
}
D::generate_conv_inds_stage2(indices, hashdata,
indice_pairs, indice_pairs_uniq, out_inds, num_out_act,
indice_pairs, indice_pairs_uniq, out_inds, num_out_act,
batch_size, output_dims_, input_dims_,
batch_size, output_dims_, input_dims_,
ksize_, stride_, padding_, dilation_);
ksize_, stride_, padding_, dilation_
, transposed, stream_int
);
}}
}}
"""
)
"""
)
code
.
raw
(
f
"""TV_THROW_RT_ERR("unknown ndim", ndim);"""
)
code
.
raw
(
f
"""TV_THROW_RT_ERR("unknown ndim", ndim);"""
)
...
...
spconv/csrc/sparse/indices.py
View file @
62c1496f
...
@@ -85,7 +85,7 @@ class ConvOutLocIter(pccm.ParameterizedClass):
...
@@ -85,7 +85,7 @@ class ConvOutLocIter(pccm.ParameterizedClass):
code
.
ctor_init
(
"layout_npq"
,
f
"LayoutNPQ::from_shape({{problem.N,
{
pqs
}
}})"
)
code
.
ctor_init
(
"layout_npq"
,
f
"LayoutNPQ::from_shape({{problem.N,
{
pqs
}
}})"
)
code
.
ctor_init
(
"layout_rs"
,
f
"LayoutRS::from_shape({{
{
rss
}
}})"
)
code
.
ctor_init
(
"layout_rs"
,
f
"LayoutRS::from_shape({{
{
rss
}
}})"
)
return
code
return
code
@
pccm
.
cuda
.
member_function
(
host
=
True
,
@
pccm
.
cuda
.
member_function
(
host
=
True
,
...
@@ -225,6 +225,28 @@ class ConvOutLocIter(pccm.ParameterizedClass):
...
@@ -225,6 +225,28 @@ class ConvOutLocIter(pccm.ParameterizedClass):
"""
)
"""
)
return
code
return
code
@
pccm
.
cuda
.
member_function
(
host
=
True
,
device
=
True
,
forceinline
=
True
,
const
=
True
)
def
query_nhw_out
(
self
):
code
=
pccm
.
FunctionCode
()
code
.
arg
(
"npq_offset"
,
"const int*"
)
code
.
arg
(
"nhw_offset"
,
f
"tv::array<int,
{
self
.
ndim
+
1
}
>&"
)
code
.
ret
(
"bool"
)
code
.
raw
(
f
"""
nhw_offset = npq_to_nhw(npq_offset);
"""
)
hw_valid
=
[]
# type: List[str]
for
i
in
range
(
self
.
ndim
):
hw_valid
.
append
((
f
"nhw_offset[
{
i
+
1
}
] >= 0 && "
f
"nhw_offset[
{
i
+
1
}
] < problem_.output_dims[
{
i
}
]"
))
code
.
raw
(
f
"""
return nhw_offset[0] < problem_.N &&
{
' && '
.
join
(
hw_valid
)
}
;
"""
)
return
code
class
SparseConvIndicesKernel
(
pccm
.
ParameterizedClass
):
class
SparseConvIndicesKernel
(
pccm
.
ParameterizedClass
):
def
__init__
(
self
,
problem
:
ConvProblem
,
dtype_indices
:
dtypes
.
DType
):
def
__init__
(
self
,
problem
:
ConvProblem
,
dtype_indices
:
dtypes
.
DType
):
super
().
__init__
()
super
().
__init__
()
...
@@ -255,7 +277,7 @@ class SparseConvIndicesKernel(pccm.ParameterizedClass):
...
@@ -255,7 +277,7 @@ class SparseConvIndicesKernel(pccm.ParameterizedClass):
code
.
arg
(
"indices_pair_size"
,
"int"
)
code
.
arg
(
"indices_pair_size"
,
"int"
)
code
.
arg
(
"RS"
,
"int"
)
code
.
arg
(
"RS"
,
"int"
)
#
code.arg(
"bool",
"transposed")
code
.
arg
(
"transposed"
,
"bool"
)
code
.
raw
(
f
"""
code
.
raw
(
f
"""
int filter_offset = blockIdx.y;
int filter_offset = blockIdx.y;
...
@@ -264,7 +286,13 @@ class SparseConvIndicesKernel(pccm.ParameterizedClass):
...
@@ -264,7 +286,13 @@ class SparseConvIndicesKernel(pccm.ParameterizedClass):
int filter_offset_mul_indices_pair_size = filter_offset * indices_pair_size;
int filter_offset_mul_indices_pair_size = filter_offset * indices_pair_size;
for (int i : tv::KernelLoopX<int>(num_indices_in)) {{
for (int i : tv::KernelLoopX<int>(num_indices_in)) {{
tv::array<int,
{
self
.
ndim
+
1
}
> npq_offset;
tv::array<int,
{
self
.
ndim
+
1
}
> npq_offset;
if (loc_iter.query_npq(indices_in + i *
{
self
.
ndim
+
1
}
, npq_offset)){{
bool valid;
if (transposed){{
valid = loc_iter.query_nhw_out(indices_in + i *
{
self
.
ndim
+
1
}
, npq_offset);
}}else{{
valid = loc_iter.query_npq(indices_in + i *
{
self
.
ndim
+
1
}
, npq_offset);
}}
if (valid){{
int old_num = tv::cuda::atomicAggInc(indice_num_per_loc + filter_offset);
int old_num = tv::cuda::atomicAggInc(indice_num_per_loc + filter_offset);
{
self
.
dtype_indices
}
offset = loc_iter.layout_npq(npq_offset);
{
self
.
dtype_indices
}
offset = loc_iter.layout_npq(npq_offset);
if (old_num < indices_pair_size){{
if (old_num < indices_pair_size){{
...
@@ -514,81 +542,6 @@ class SparseConvIndicesKernel(pccm.ParameterizedClass):
...
@@ -514,81 +542,6 @@ class SparseConvIndicesKernel(pccm.ParameterizedClass):
"""
)
"""
)
return
code
return
code
@
pccm
.
cuda
.
static_function
def
generate_conv_inds
(
self
):
code
=
pccm
.
FunctionCode
()
code
.
arg
(
"indices, hashdata"
,
"tv::Tensor"
)
code
.
arg
(
"indice_pairs, indice_pairs_uniq, out_inds, indice_num_per_loc"
,
"tv::Tensor"
)
code
.
arg
(
"batch_size"
,
"int"
)
code
.
arg
(
"output_dims, input_dims"
,
f
"tv::array<int,
{
self
.
ndim
}
>"
)
code
.
arg
(
"ksize, stride, padding, dilation"
,
f
"tv::array<int,
{
self
.
ndim
}
>"
)
code
.
raw
(
f
"""
// TODO stream
// TODO handle num input == 0
int kv = tv::arrayops::prod(ksize);
TV_ASSERT_RT_ERR(kv == indice_pairs.dim(1), "error");
// indice_pairs: [2, kv, indices.dim(0)]
// indice_pairs_uniq: [indice_pairs.size() / 2 + 1]
// out_inds: [MaxSize,
{
self
.
ndim
+
1
}
]
auto timer = tv::CudaContextTimer<>();
int64_t uniq_size = indice_pairs.size() / 2 + 1;
TV_ASSERT_RT_ERR(indice_pairs_uniq.dim(0) == uniq_size, "error");
TV_ASSERT_RT_ERR(indice_num_per_loc.dim(0) == kv, "error");
int64_t expected_out_size = indices.dim(0) * kv;
TV_ASSERT_RT_ERR(out_inds.dim(0) == expected_out_size && out_inds.dim(1) ==
{
self
.
ndim
+
1
}
, "error");
tv::cuda::Launch launcher_num_act_in(indices.dim(0));
// tv::cuda::Launch launcher_num_act_in_2(indices.dim(0));
launcher_num_act_in.blocks.y = kv;
ConvProblem problem(batch_size, 1, 1, input_dims, output_dims, ksize, padding, stride, dilation);
ConvLocIter loc_iter(problem);
tv::cuda::Launch launcher_clean_uniq(uniq_size);
launcher_clean_uniq(clean_indices_uniq, indice_pairs_uniq.data_ptr<
{
self
.
dtype_indices
}
>(), uniq_size);
tv::ssprint("clean time", timer.report() / 1000.0);
launcher_num_act_in(calc_conv_indices_stage1, loc_iter, indices.data_ptr<const int>(),
indice_pairs.data_ptr<
{
self
.
dtype_indices
}
>(),
indice_pairs_uniq.data_ptr<
{
self
.
dtype_indices
}
>(), indice_num_per_loc.data_ptr<int>(), indices.dim(0),
indice_pairs.dim(2), kv);
tv::ssprint("calc_conv_indices_stage1 time", timer.report() / 1000.0, uniq_size);
thrust::device_ptr<
{
self
.
dtype_indices
}
> ptr_tr(indice_pairs_uniq.data_ptr<
{
self
.
dtype_indices
}
>());
auto thrust_ctx = thrust::cuda::par.on(0);
thrust::sort(thrust_ctx, ptr_tr, ptr_tr + uniq_size);
auto new_end = thrust::unique(thrust_ctx, ptr_tr, ptr_tr + uniq_size);
auto num_out_act = new_end - ptr_tr - 1;
tv::ssprint("unique time", num_out_act, timer.report() / 1000.0);
// return num_out_act;
// TODO handle invalid num_out_act
indice_pairs_uniq = indice_pairs_uniq.slice_first_axis(0, num_out_act);
tv::cuda::Launch lanucher_build_hash(num_out_act);
using V =
{
self
.
dtype_indices
}
;
using KeyType =
{
self
.
dtype_indices
}
;
constexpr KeyType kEmptyKey = std::numeric_limits<KeyType>::max();
using table_t =
tv::hash::LinearHashTable<KeyType, V, tv::hash::Murmur3Hash<KeyType>,
kEmptyKey, false>;
using pair_t = typename table_t::value_type;
TV_ASSERT_RT_ERR(hashdata.dim(0) >= num_out_act, "hash size not enough");
table_t hash = table_t(hashdata.data_ptr<pair_t>(), hashdata.dim(0));
hash.clear();
tv::ssprint("clear hash time", hashdata.dim(0), timer.report() / 1000.0);
lanucher_build_hash(build_conv_hash_table<table_t>, hash, out_inds.data_ptr<int>(), indice_pairs_uniq.data_ptr<const
{
self
.
dtype_indices
}
>(),
loc_iter.layout_npq, num_out_act);
tv::ssprint("build_hash time", num_out_act, timer.report() / 1000.0);
launcher_num_act_in(calc_conv_indices_stage2<table_t>, hash, indice_pairs[1].data_ptr<int>(), indices.dim(0),
indice_pairs.dim(2));
tv::ssprint("gem conv inds time", timer.report() / 1000.0);
return num_out_act;
"""
)
return
code
.
ret
(
"int"
)
@
pccm
.
cuda
.
static_function
@
pccm
.
cuda
.
static_function
def
generate_conv_inds_stage1
(
self
):
def
generate_conv_inds_stage1
(
self
):
code
=
pccm
.
FunctionCode
()
code
=
pccm
.
FunctionCode
()
...
@@ -597,6 +550,8 @@ class SparseConvIndicesKernel(pccm.ParameterizedClass):
...
@@ -597,6 +550,8 @@ class SparseConvIndicesKernel(pccm.ParameterizedClass):
code
.
arg
(
"batch_size"
,
"int"
)
code
.
arg
(
"batch_size"
,
"int"
)
code
.
arg
(
"output_dims, input_dims"
,
f
"tv::array<int,
{
self
.
ndim
}
>"
)
code
.
arg
(
"output_dims, input_dims"
,
f
"tv::array<int,
{
self
.
ndim
}
>"
)
code
.
arg
(
"ksize, stride, padding, dilation"
,
f
"tv::array<int,
{
self
.
ndim
}
>"
)
code
.
arg
(
"ksize, stride, padding, dilation"
,
f
"tv::array<int,
{
self
.
ndim
}
>"
)
code
.
arg
(
"transposed"
,
f
"bool"
,
"false"
)
code
.
arg
(
"stream_int"
,
f
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"stream_int"
,
f
"std::uintptr_t"
,
"0"
)
code
.
raw
(
f
"""
code
.
raw
(
f
"""
...
@@ -620,7 +575,23 @@ class SparseConvIndicesKernel(pccm.ParameterizedClass):
...
@@ -620,7 +575,23 @@ class SparseConvIndicesKernel(pccm.ParameterizedClass):
launcher_num_act_in(calc_conv_indices_stage1, loc_iter, indices.data_ptr<const int>(),
launcher_num_act_in(calc_conv_indices_stage1, loc_iter, indices.data_ptr<const int>(),
indice_pairs.data_ptr<
{
self
.
dtype_indices
}
>(),
indice_pairs.data_ptr<
{
self
.
dtype_indices
}
>(),
indice_pairs_uniq.data_ptr<
{
self
.
dtype_indices
}
>(), indice_num_per_loc.data_ptr<int>(), indices.dim(0),
indice_pairs_uniq.data_ptr<
{
self
.
dtype_indices
}
>(), indice_num_per_loc.data_ptr<int>(), indices.dim(0),
indice_pairs.dim(2), kv);
indice_pairs.dim(2), kv, transposed);
// thrust::device_ptr<
{
self
.
dtype_indices
}
> ptr_tr(indice_pairs_uniq.data_ptr<
{
self
.
dtype_indices
}
>());
// auto thrust_ctx = thrust::cuda::par.on(reinterpret_cast<cudaStream_t>(stream_int));
// thrust::sort(thrust_ctx, ptr_tr, ptr_tr + uniq_size);
// auto new_end = thrust::unique(thrust_ctx, ptr_tr, ptr_tr + uniq_size);
// auto num_out_act = new_end - ptr_tr - 1;
// return num_out_act;
"""
)
return
code
# .ret("int")
@
pccm
.
cuda
.
static_function
def
generate_conv_inds_stage1_5
(
self
):
code
=
pccm
.
FunctionCode
()
code
.
arg
(
"indice_pairs_uniq"
,
"tv::Tensor"
)
code
.
arg
(
"uniq_size"
,
"int64_t"
)
code
.
arg
(
"stream_int"
,
f
"std::uintptr_t"
,
"0"
)
code
.
raw
(
f
"""
thrust::device_ptr<
{
self
.
dtype_indices
}
> ptr_tr(indice_pairs_uniq.data_ptr<
{
self
.
dtype_indices
}
>());
thrust::device_ptr<
{
self
.
dtype_indices
}
> ptr_tr(indice_pairs_uniq.data_ptr<
{
self
.
dtype_indices
}
>());
auto thrust_ctx = thrust::cuda::par.on(reinterpret_cast<cudaStream_t>(stream_int));
auto thrust_ctx = thrust::cuda::par.on(reinterpret_cast<cudaStream_t>(stream_int));
thrust::sort(thrust_ctx, ptr_tr, ptr_tr + uniq_size);
thrust::sort(thrust_ctx, ptr_tr, ptr_tr + uniq_size);
...
@@ -630,6 +601,7 @@ class SparseConvIndicesKernel(pccm.ParameterizedClass):
...
@@ -630,6 +601,7 @@ class SparseConvIndicesKernel(pccm.ParameterizedClass):
"""
)
"""
)
return
code
.
ret
(
"int"
)
return
code
.
ret
(
"int"
)
@
pccm
.
cuda
.
static_function
@
pccm
.
cuda
.
static_function
def
generate_conv_inds_stage2
(
self
):
def
generate_conv_inds_stage2
(
self
):
code
=
pccm
.
FunctionCode
()
code
=
pccm
.
FunctionCode
()
...
@@ -639,6 +611,7 @@ class SparseConvIndicesKernel(pccm.ParameterizedClass):
...
@@ -639,6 +611,7 @@ class SparseConvIndicesKernel(pccm.ParameterizedClass):
code
.
arg
(
"batch_size"
,
"int"
)
code
.
arg
(
"batch_size"
,
"int"
)
code
.
arg
(
"output_dims, input_dims"
,
f
"tv::array<int,
{
self
.
ndim
}
>"
)
code
.
arg
(
"output_dims, input_dims"
,
f
"tv::array<int,
{
self
.
ndim
}
>"
)
code
.
arg
(
"ksize, stride, padding, dilation"
,
f
"tv::array<int,
{
self
.
ndim
}
>"
)
code
.
arg
(
"ksize, stride, padding, dilation"
,
f
"tv::array<int,
{
self
.
ndim
}
>"
)
code
.
arg
(
"transposed"
,
f
"bool"
,
"false"
)
code
.
arg
(
"stream_int"
,
f
"std::uintptr_t"
,
"0"
)
code
.
arg
(
"stream_int"
,
f
"std::uintptr_t"
,
"0"
)
code
.
raw
(
f
"""
code
.
raw
(
f
"""
auto custream = reinterpret_cast<cudaStream_t>(stream_int);
auto custream = reinterpret_cast<cudaStream_t>(stream_int);
...
@@ -651,7 +624,7 @@ class SparseConvIndicesKernel(pccm.ParameterizedClass):
...
@@ -651,7 +624,7 @@ class SparseConvIndicesKernel(pccm.ParameterizedClass):
// out_inds: [MaxSize,
{
self
.
ndim
+
1
}
]
// out_inds: [MaxSize,
{
self
.
ndim
+
1
}
]
auto timer = tv::CudaContextTimer<>();
auto timer = tv::CudaContextTimer<>();
int64_t uniq_size = indice_pairs.size() / 2 + 1;
int64_t uniq_size = indice_pairs.size() / 2 + 1;
TV_ASSERT_RT_ERR(indice_pairs_uniq.dim(0)
=
=
uniq_size
, "error");
TV_ASSERT_RT_ERR(indice_pairs_uniq.dim(0)
>
=
num_out_act
, "error");
TV_ASSERT_RT_ERR(out_inds.dim(0) >= num_out_act && out_inds.dim(1) ==
{
self
.
ndim
+
1
}
, "error");
TV_ASSERT_RT_ERR(out_inds.dim(0) >= num_out_act && out_inds.dim(1) ==
{
self
.
ndim
+
1
}
, "error");
tv::cuda::Launch launcher_num_act_in(indices.dim(0), custream);
tv::cuda::Launch launcher_num_act_in(indices.dim(0), custream);
launcher_num_act_in.blocks.y = kv;
launcher_num_act_in.blocks.y = kv;
...
...
spconv/csrc/sparse/maxpool.py
View file @
62c1496f
...
@@ -22,7 +22,9 @@ from cumm.common import TensorView, TensorViewHashKernel, TensorViewKernel, Thru
...
@@ -22,7 +22,9 @@ from cumm.common import TensorView, TensorViewHashKernel, TensorViewKernel, Thru
from
cumm.gemm
import
codeops
from
cumm.gemm
import
codeops
from
typing
import
List
from
typing
import
List
from
cumm.conv.params
import
ConvProblem
from
cumm.conv.params
import
ConvProblem
from
cumm.gemm.mask_iters
import
MaskTileIterator
,
MaskTileIteratorParams
import
numpy
as
np
import
numpy
as
np
from
cumm.gemm
import
(
thread_map
)
class
IndiceMaxPool
(
pccm
.
Class
):
class
IndiceMaxPool
(
pccm
.
Class
):
# TODO optimize this function
# TODO optimize this function
...
@@ -171,4 +173,3 @@ class IndiceMaxPool(pccm.Class):
...
@@ -171,4 +173,3 @@ class IndiceMaxPool(pccm.Class):
}});
}});
"""
)
"""
)
return
code
return
code
spconv/pytorch/conv.py
View file @
62c1496f
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
import
math
import
math
import
time
import
time
from
typing
import
List
,
Optional
,
Tuple
,
Union
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
...
@@ -40,8 +41,12 @@ def _calculate_fan_in_and_fan_out_hwio(tensor):
...
@@ -40,8 +41,12 @@ def _calculate_fan_in_and_fan_out_hwio(tensor):
fan_in
=
tensor
.
size
(
-
2
)
fan_in
=
tensor
.
size
(
-
2
)
fan_out
=
tensor
.
size
(
-
1
)
fan_out
=
tensor
.
size
(
-
1
)
else
:
else
:
num_input_fmaps
=
tensor
.
size
(
-
1
)
if
FILTER_HWIO
:
num_output_fmaps
=
tensor
.
size
(
-
2
)
num_input_fmaps
=
tensor
.
size
(
-
2
)
num_output_fmaps
=
tensor
.
size
(
-
1
)
else
:
num_input_fmaps
=
tensor
.
size
(
-
1
)
num_output_fmaps
=
tensor
.
size
(
-
2
)
receptive_field_size
=
1
receptive_field_size
=
1
if
tensor
.
dim
()
>
2
:
if
tensor
.
dim
()
>
2
:
receptive_field_size
=
tensor
[...,
0
,
0
].
numel
()
receptive_field_size
=
tensor
[...,
0
,
0
].
numel
()
...
@@ -58,22 +63,22 @@ class SparseConvolution(SparseModule):
...
@@ -58,22 +63,22 @@ class SparseConvolution(SparseModule):
]
]
def
__init__
(
self
,
def
__init__
(
self
,
ndim
,
ndim
:
int
,
in_channels
,
in_channels
:
int
,
out_channels
,
out_channels
:
int
,
kernel_size
=
3
,
kernel_size
:
Union
[
int
,
List
[
int
],
Tuple
[
int
,
...]]
=
3
,
stride
=
1
,
stride
:
Union
[
int
,
List
[
int
],
Tuple
[
int
,
...]]
=
1
,
padding
=
0
,
padding
:
Union
[
int
,
List
[
int
],
Tuple
[
int
,
...]]
=
0
,
dilation
=
1
,
dilation
:
Union
[
int
,
List
[
int
],
Tuple
[
int
,
...]]
=
1
,
groups
=
1
,
groups
:
Union
[
int
,
List
[
int
],
Tuple
[
int
,
...]]
=
1
,
bias
=
True
,
bias
:
bool
=
True
,
subm
=
False
,
subm
:
bool
=
False
,
output_padding
=
0
,
output_padding
:
Union
[
int
,
List
[
int
],
Tuple
[
int
,
...]]
=
0
,
transposed
=
False
,
transposed
:
bool
=
False
,
inverse
=
False
,
inverse
:
bool
=
False
,
indice_key
=
None
,
indice_key
:
Optional
[
str
]
=
None
,
fused_bn
=
False
,
fused_bn
:
bool
=
False
,
algo
=
ops
.
ConvAlgo
.
Native
,
algo
:
ops
.
ConvAlgo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
name
=
None
):
super
(
SparseConvolution
,
self
).
__init__
(
name
=
name
)
super
(
SparseConvolution
,
self
).
__init__
(
name
=
name
)
assert
groups
==
1
assert
groups
==
1
...
@@ -117,8 +122,6 @@ class SparseConvolution(SparseModule):
...
@@ -117,8 +122,6 @@ class SparseConvolution(SparseModule):
self
.
bias
=
Parameter
(
torch
.
Tensor
(
out_channels
))
self
.
bias
=
Parameter
(
torch
.
Tensor
(
out_channels
))
else
:
else
:
self
.
register_parameter
(
'bias'
,
None
)
self
.
register_parameter
(
'bias'
,
None
)
# self.workspace_for_splitk = torch.zeros((GLOBAL_MAXIMUM_SPLITK,), dtype=torch.int8)
# self.register_buffer("workspace_for_splitk", self.workspace_for_splitk)
self
.
reset_parameters
()
self
.
reset_parameters
()
def
reset_parameters
(
self
):
def
reset_parameters
(
self
):
...
@@ -234,6 +237,7 @@ class SparseConvolution(SparseModule):
...
@@ -234,6 +237,7 @@ class SparseConvolution(SparseModule):
t
=
time
.
time
()
t
=
time
.
time
()
if
self
.
fused_bn
:
if
self
.
fused_bn
:
raise
NotImplementedError
assert
self
.
bias
is
not
None
assert
self
.
bias
is
not
None
out_features
=
ops
.
fused_indice_conv
(
features
,
self
.
weight
,
out_features
=
ops
.
fused_indice_conv
(
features
,
self
.
weight
,
self
.
bias
,
self
.
bias
,
...
@@ -382,6 +386,34 @@ class SparseConv4d(SparseConvolution):
...
@@ -382,6 +386,34 @@ class SparseConv4d(SparseConvolution):
name
=
name
)
name
=
name
)
class
SparseConvTranspose1d
(
SparseConvolution
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
padding
=
0
,
dilation
=
1
,
groups
=
1
,
bias
=
True
,
indice_key
=
None
,
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SparseConvTranspose1d
,
self
).
__init__
(
1
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
padding
,
dilation
,
groups
,
bias
,
transposed
=
True
,
indice_key
=
indice_key
,
algo
=
algo
,
name
=
name
)
class
SparseConvTranspose2d
(
SparseConvolution
):
class
SparseConvTranspose2d
(
SparseConvolution
):
def
__init__
(
self
,
def
__init__
(
self
,
in_channels
,
in_channels
,
...
@@ -437,6 +469,34 @@ class SparseConvTranspose3d(SparseConvolution):
...
@@ -437,6 +469,34 @@ class SparseConvTranspose3d(SparseConvolution):
algo
=
algo
,
algo
=
algo
,
name
=
name
)
name
=
name
)
class
SparseConvTranspose4d
(
SparseConvolution
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
padding
=
0
,
dilation
=
1
,
groups
=
1
,
bias
=
True
,
indice_key
=
None
,
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SparseConvTranspose4d
,
self
).
__init__
(
4
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
padding
,
dilation
,
groups
,
bias
,
transposed
=
True
,
indice_key
=
indice_key
,
algo
=
algo
,
name
=
name
)
class
SparseInverseConv1d
(
SparseConvolution
):
class
SparseInverseConv1d
(
SparseConvolution
):
def
__init__
(
self
,
def
__init__
(
self
,
in_channels
,
in_channels
,
...
...
spconv/pytorch/modules.py
View file @
62c1496f
...
@@ -143,50 +143,3 @@ class SparseSequential(SparseModule):
...
@@ -143,50 +143,3 @@ class SparseSequential(SparseModule):
input
=
module
(
input
)
input
=
module
(
input
)
return
input
return
input
def
fused
(
self
):
"""don't use this. no effect.
"""
from
spconv.pytorch.conv
import
SparseConvolution
mods
=
[
v
for
k
,
v
in
self
.
_modules
.
items
()]
fused_mods
=
[]
idx
=
0
while
idx
<
len
(
mods
):
if
is_sparse_conv
(
mods
[
idx
]):
if
idx
<
len
(
mods
)
-
1
and
isinstance
(
mods
[
idx
+
1
],
nn
.
BatchNorm1d
):
new_module
=
SparseConvolution
(
ndim
=
mods
[
idx
].
ndim
,
in_channels
=
mods
[
idx
].
in_channels
,
out_channels
=
mods
[
idx
].
out_channels
,
kernel_size
=
mods
[
idx
].
kernel_size
,
stride
=
mods
[
idx
].
stride
,
padding
=
mods
[
idx
].
padding
,
dilation
=
mods
[
idx
].
dilation
,
groups
=
mods
[
idx
].
groups
,
bias
=
True
,
subm
=
mods
[
idx
].
subm
,
output_padding
=
mods
[
idx
].
output_padding
,
transposed
=
mods
[
idx
].
transposed
,
inverse
=
mods
[
idx
].
inverse
,
indice_key
=
mods
[
idx
].
indice_key
,
fused_bn
=
True
,
)
new_module
.
load_state_dict
(
mods
[
idx
].
state_dict
(),
False
)
new_module
.
to
(
mods
[
idx
].
weight
.
device
)
conv
=
new_module
bn
=
mods
[
idx
+
1
]
conv
.
bias
.
data
.
zero_
()
conv
.
weight
.
data
[:]
=
conv
.
weight
.
data
*
bn
.
weight
.
data
/
(
torch
.
sqrt
(
bn
.
running_var
)
+
bn
.
eps
)
conv
.
bias
.
data
[:]
=
(
conv
.
bias
.
data
-
bn
.
running_mean
)
*
bn
.
weight
.
data
/
(
torch
.
sqrt
(
bn
.
running_var
)
+
bn
.
eps
)
+
bn
.
bias
.
data
fused_mods
.
append
(
conv
)
idx
+=
2
else
:
fused_mods
.
append
(
mods
[
idx
])
idx
+=
1
else
:
fused_mods
.
append
(
mods
[
idx
])
idx
+=
1
return
SparseSequential
(
*
fused_mods
)
spconv/pytorch/ops.py
View file @
62c1496f
...
@@ -11,7 +11,7 @@
...
@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
functools
from
enum
import
Enum
from
enum
import
Enum
from
cumm
import
tensorview
as
tv
from
cumm
import
tensorview
as
tv
from
cumm.gemm.algospec.core
import
ShuffleStrideType
from
cumm.gemm.algospec.core
import
ShuffleStrideType
...
@@ -23,7 +23,7 @@ from spconv.algo import AlgoHint, ConvAlgo
...
@@ -23,7 +23,7 @@ from spconv.algo import AlgoHint, ConvAlgo
from
typing
import
List
,
Union
from
typing
import
List
,
Union
from
spconv.pytorch.cppcore
import
torch_tensor_to_tv
,
get_current_stream
from
spconv.pytorch.cppcore
import
torch_tensor_to_tv
,
get_current_stream
from
spconv.core_cc.csrc.sparse.all
import
SpconvOps
from
spconv.core_cc.csrc.sparse.all
import
SpconvOps
from
spconv.algo
import
GEMM
# , GATHER, SCATTER
from
spconv.algo
import
GEMM
# , GATHER, SCATTER
import
time
import
time
from
spconv.constants
import
FILTER_HWIO
from
spconv.constants
import
FILTER_HWIO
...
@@ -58,26 +58,17 @@ def get_indice_pairs(indices: torch.Tensor,
...
@@ -58,26 +58,17 @@ def get_indice_pairs(indices: torch.Tensor,
batch_size
:
int
,
batch_size
:
int
,
spatial_shape
:
List
[
int
],
spatial_shape
:
List
[
int
],
algo
:
ConvAlgo
,
algo
:
ConvAlgo
,
ksize
:
Union
[
int
,
List
[
int
]
]
,
ksize
:
List
[
int
],
stride
:
Union
[
int
,
List
[
int
]
]
,
stride
:
List
[
int
],
padding
:
Union
[
int
,
List
[
int
]
]
,
padding
:
List
[
int
],
dilation
:
Union
[
int
,
List
[
int
]
]
,
dilation
:
List
[
int
],
out_padding
:
Union
[
int
,
List
[
int
]
]
,
out_padding
:
List
[
int
],
subm
:
bool
=
False
,
subm
:
bool
=
False
,
transpose
:
bool
=
False
):
transpose
:
bool
=
False
):
# torch.cuda.synchronize()
# t = time.time()
ndim
=
indices
.
shape
[
1
]
-
1
ndim
=
indices
.
shape
[
1
]
-
1
if
not
isinstance
(
ksize
,
(
list
,
tuple
)):
kv
:
int
=
functools
.
reduce
(
lambda
x
,
y
:
x
*
y
,
ksize
,
1
)
ksize
=
[
ksize
]
*
ndim
if
not
isinstance
(
stride
,
(
list
,
tuple
)):
stride
=
[
stride
]
*
ndim
if
not
isinstance
(
padding
,
(
list
,
tuple
)):
padding
=
[
padding
]
*
ndim
if
not
isinstance
(
dilation
,
(
list
,
tuple
)):
dilation
=
[
dilation
]
*
ndim
if
not
isinstance
(
out_padding
,
(
list
,
tuple
)):
out_padding
=
[
out_padding
]
*
ndim
kv
:
int
=
int
(
np
.
prod
(
ksize
))
if
not
subm
:
if
not
subm
:
if
transpose
:
if
transpose
:
out_shape
=
get_deconv_output_size
(
spatial_shape
,
ksize
,
stride
,
out_shape
=
get_deconv_output_size
(
spatial_shape
,
ksize
,
stride
,
...
@@ -87,8 +78,9 @@ def get_indice_pairs(indices: torch.Tensor,
...
@@ -87,8 +78,9 @@ def get_indice_pairs(indices: torch.Tensor,
padding
,
dilation
)
padding
,
dilation
)
else
:
else
:
out_shape
=
spatial_shape
out_shape
=
spatial_shape
assert
algo
==
ConvAlgo
.
Native
and
not
transpose
,
"TODO"
assert
algo
==
ConvAlgo
.
Native
,
"TODO"
stream
=
get_current_stream
()
stream
=
get_current_stream
()
pair
=
torch
.
full
((
2
,
kv
,
indices
.
shape
[
0
]),
pair
=
torch
.
full
((
2
,
kv
,
indices
.
shape
[
0
]),
-
1
,
-
1
,
dtype
=
indices
.
dtype
,
dtype
=
indices
.
dtype
,
...
@@ -96,19 +88,20 @@ def get_indice_pairs(indices: torch.Tensor,
...
@@ -96,19 +88,20 @@ def get_indice_pairs(indices: torch.Tensor,
indice_num_per_loc
=
torch
.
zeros
((
kv
,
),
indice_num_per_loc
=
torch
.
zeros
((
kv
,
),
dtype
=
indices
.
dtype
,
dtype
=
indices
.
dtype
,
device
=
indices
.
device
)
device
=
indices
.
device
)
inds_tv
=
torch_tensor_to_tv
(
indices
)
inds_tv
=
torch_tensor_to_tv
(
indices
)
pair_tv
=
torch_tensor_to_tv
(
pair
)
pair_tv
=
torch_tensor_to_tv
(
pair
)
indice_num_per_loc_tv
=
torch_tensor_to_tv
(
indice_num_per_loc
)
indice_num_per_loc_tv
=
torch_tensor_to_tv
(
indice_num_per_loc
)
# torch.cuda.synchronize()
# t = time.time()
if
subm
:
if
subm
:
out_inds
=
indices
out_inds
=
indices
hashdata
=
torch
.
empty
((
out_inds
.
shape
[
0
]
*
2
,
),
hashdata
=
torch
.
empty
((
out_inds
.
shape
[
0
]
*
2
,
),
dtype
=
torch
.
int64
,
dtype
=
torch
.
int64
,
device
=
indices
.
device
)
device
=
indices
.
device
)
out_inds_tv
=
torch_tensor_to_tv
(
out_inds
)
out_inds_tv
=
torch_tensor_to_tv
(
out_inds
)
hashdata_tv
=
torch_tensor_to_tv
(
hashdata
,
dtype
=
tv
.
custom64
)
hashdata_tv
=
torch_tensor_to_tv
(
hashdata
,
dtype
=
tv
.
custom64
)
SpconvOps
.
generate_subm_conv_inds
(
inds_tv
,
SpconvOps
.
generate_subm_conv_inds
(
inds_tv
,
hashdata_tv
,
hashdata_tv
,
pair_tv
,
pair_tv
,
...
@@ -120,27 +113,36 @@ def get_indice_pairs(indices: torch.Tensor,
...
@@ -120,27 +113,36 @@ def get_indice_pairs(indices: torch.Tensor,
dilation
=
dilation
,
dilation
=
dilation
,
stream_int
=
stream
)
stream_int
=
stream
)
# torch.cuda.synchronize()
# torch.cuda.synchronize()
# print("SUBM", time.time() - t)
# print("SUBM INDICE GEN", time.time() - t)
else
:
else
:
indice_pairs_uniq
=
torch
.
empty
((
pair
.
numel
()
//
2
+
1
,
),
indice_pairs_uniq
=
torch
.
empty
((
pair
.
numel
()
//
2
+
1
,
),
dtype
=
indices
.
dtype
,
dtype
=
indices
.
dtype
,
device
=
indices
.
device
)
device
=
indices
.
device
)
indice_pairs_uniq_tv
=
torch_tensor_to_tv
(
indice_pairs_uniq
)
indice_pairs_uniq_tv
=
torch_tensor_to_tv
(
indice_pairs_uniq
)
num_act_out
=
SpconvOps
.
generate_conv_inds_stage1
(
inds_tv
,
SpconvOps
.
generate_conv_inds_stage1
(
inds_tv
,
pair_tv
,
pair_tv
,
indice_pairs_uniq_tv
,
indice_pairs_uniq_tv
,
indice_num_per_loc_tv
,
indice_num_per_loc_tv
,
batch_size
=
batch_size
,
batch_size
=
batch_size
,
output_dims
=
out_shape
,
output_dims
=
out_shape
,
input_dims
=
spatial_shape
,
input_dims
=
spatial_shape
,
ksize
=
ksize
,
ksize
=
ksize
,
stride
=
stride
,
stride
=
stride
,
padding
=
padding
,
padding
=
padding
,
dilation
=
dilation
,
dilation
=
dilation
,
stream_int
=
stream
)
transposed
=
transpose
,
stream_int
=
stream
)
uniq_res
=
indice_pairs_uniq
.
unique
()
num_act_out
=
uniq_res
.
shape
[
0
]
-
1
uniq_res_tv
=
torch_tensor_to_tv
(
uniq_res
)
# num_act_out = SpconvOps.generate_conv_inds_stage1_5(
# indice_pairs_uniq_tv,
# ndim,
# uniq_size=indice_pairs_uniq_tv.size,
# stream_int=stream)
# uniq_res_tv = indice_pairs_uniq_tv.slice_first_axis(0, num_act_out)
out_inds
=
torch
.
empty
((
num_act_out
,
indices
.
shape
[
1
]),
out_inds
=
torch
.
empty
((
num_act_out
,
indices
.
shape
[
1
]),
dtype
=
indices
.
dtype
,
dtype
=
indices
.
dtype
,
device
=
indices
.
device
)
device
=
indices
.
device
)
...
@@ -152,7 +154,7 @@ def get_indice_pairs(indices: torch.Tensor,
...
@@ -152,7 +154,7 @@ def get_indice_pairs(indices: torch.Tensor,
SpconvOps
.
generate_conv_inds_stage2
(
inds_tv
,
SpconvOps
.
generate_conv_inds_stage2
(
inds_tv
,
hashdata_tv
,
hashdata_tv
,
pair_tv
,
pair_tv
,
indice_pairs_
uniq_tv
,
uniq
_res
_tv
,
out_inds_tv
,
out_inds_tv
,
num_out_act
=
num_act_out
,
num_out_act
=
num_act_out
,
batch_size
=
batch_size
,
batch_size
=
batch_size
,
...
@@ -162,11 +164,10 @@ def get_indice_pairs(indices: torch.Tensor,
...
@@ -162,11 +164,10 @@ def get_indice_pairs(indices: torch.Tensor,
stride
=
stride
,
stride
=
stride
,
padding
=
padding
,
padding
=
padding
,
dilation
=
dilation
,
dilation
=
dilation
,
transposed
=
transpose
,
stream_int
=
stream
)
stream_int
=
stream
)
# torch.cuda.synchronize()
# torch.cuda.synchronize()
# print("REGU", time.time() - t)
# print("INDICE GEN", time.time() - t)
return
out_inds
,
pair
,
indice_num_per_loc
return
out_inds
,
pair
,
indice_num_per_loc
...
@@ -228,8 +229,6 @@ def indice_conv(features: torch.Tensor,
...
@@ -228,8 +229,6 @@ def indice_conv(features: torch.Tensor,
c_inds_shape
=
[
nhot_profile
],
c_inds_shape
=
[
nhot_profile
],
hint
=
AlgoHint
.
Fowrard
.
value
)
hint
=
AlgoHint
.
Fowrard
.
value
)
gather_data_tv
=
tv
.
Tensor
()
scatter_data_tv
=
tv
.
Tensor
()
maxnhot
=
max
(
indice_pair_num_cpu
)
maxnhot
=
max
(
indice_pair_num_cpu
)
if
profile_res
is
None
:
if
profile_res
is
None
:
...
@@ -270,36 +269,33 @@ def indice_conv(features: torch.Tensor,
...
@@ -270,36 +269,33 @@ def indice_conv(features: torch.Tensor,
continue
continue
inp_indices
=
pair_in
[
i
].
slice_first_axis
(
0
,
nhot
)
inp_indices
=
pair_in
[
i
].
slice_first_axis
(
0
,
nhot
)
out_indices
=
pair_out
[
i
].
slice_first_axis
(
0
,
nhot
)
out_indices
=
pair_out
[
i
].
slice_first_axis
(
0
,
nhot
)
# inp_indices = torch_tensor_to_tv(inp_indices_th)
# out_indices = torch_tensor_to_tv(out_indices_th)
b
=
filters_tv
[
i
]
b
=
filters_tv
[
i
]
# inp @ filter.T, NC @ KC
# inp @ filter.T, NC @ KC
beta
=
1.0
if
inited
else
0.0
beta
=
1.0
if
inited
else
0.0
algo_desp
=
GEMM
.
run_profile
(
algo_desp
=
GEMM
.
run_profile
(
profile_res
,
profile_res
,
a
,
a
,
b
,
b
,
c
,
c
,
False
,
False
,
False
if
FILTER_HWIO
else
True
,
False
if
FILTER_HWIO
else
True
,
False
,
False
,
arch
=
arch
,
arch
=
arch
,
stream
=
stream
,
stream
=
stream
,
shuffle_type
=
ShuffleStrideType
.
ShuffleAC
,
shuffle_type
=
ShuffleStrideType
.
ShuffleAC
,
a_inds
=
inp_indices
,
a_inds
=
inp_indices
,
c_inds
=
out_indices
,
c_inds
=
out_indices
,
hint
=
AlgoHint
.
Fowrard
.
value
,
hint
=
AlgoHint
.
Fowrard
.
value
,
alpha
=
1.0
,
alpha
=
1.0
,
beta
=
beta
)
beta
=
beta
)
# gather_times += gather_time
# gather_times += gather_time
inited
=
True
inited
=
True
# torch.cuda.synchronize()
# torch.cuda.synchronize()
# print(stream, valid_count, maxnhot, features.shape[0], features.shape[1], out_channel, time.time() - t, total_times, txt)
#
#
print(stream, valid_count, maxnhot, features.shape[0], features.shape[1], out_channel, time.time() - t, total_times, txt)
# print(algo_desp, profile_res.external_gather, profile_res.splitk, features.shape[0], features.shape[1], out_channel, time.time() - t
, total_times
)
#
#
print(algo_desp, profile_res.external_gather, profile_res.splitk, features.shape[0], features.shape[1], out_channel, time.time() - t)
# print(indice_pair_num_cpu)
#
#
print(indice_pair_num_cpu)
# print(time.time() - t)
# print(
"G",
time.time() - t)
return
out_features
return
out_features
...
@@ -316,8 +312,6 @@ def indice_conv_backward(features: torch.Tensor,
...
@@ -316,8 +312,6 @@ def indice_conv_backward(features: torch.Tensor,
inverse
:
bool
=
False
,
inverse
:
bool
=
False
,
subm
:
bool
=
False
,
subm
:
bool
=
False
,
algo
:
ConvAlgo
=
ConvAlgo
.
Native
):
algo
:
ConvAlgo
=
ConvAlgo
.
Native
):
# workspace = torch.empty((10000), dtype=torch.uint8, device=features.device)
# workspace_tv = torch_tensor_to_tv(workspace)
# torch.cuda.synchronize()
# torch.cuda.synchronize()
# t = time.time()
# t = time.time()
...
@@ -400,7 +394,6 @@ def indice_conv_backward(features: torch.Tensor,
...
@@ -400,7 +394,6 @@ def indice_conv_backward(features: torch.Tensor,
c_inds
=
out_indices
,
c_inds
=
out_indices
,
alpha
=
1.0
,
alpha
=
1.0
,
beta
=
0.0
,
beta
=
0.0
,
# scatter_data=scatter_data_tv.slice_first_axis(0, nhot_profile),
hint
=
AlgoHint
.
BackwardInput
.
value
,
hint
=
AlgoHint
.
BackwardInput
.
value
,
stream
=
stream
)
stream
=
stream
)
if
not
FILTER_HWIO
:
if
not
FILTER_HWIO
:
...
@@ -445,7 +438,6 @@ def indice_conv_backward(features: torch.Tensor,
...
@@ -445,7 +438,6 @@ def indice_conv_backward(features: torch.Tensor,
b_inds
=
b_inds_wgrad
,
b_inds
=
b_inds_wgrad
,
alpha
=
1.0
,
alpha
=
1.0
,
beta
=
0.0
,
beta
=
0.0
,
# scatter_data=scatter_data_tv.slice_first_axis(0, nhot_profile),
hint
=
AlgoHint
.
BackwardWeight
.
value
,
hint
=
AlgoHint
.
BackwardWeight
.
value
,
stream
=
stream
)
stream
=
stream
)
# print(profile_res_wgrad.algo_desp, profile_res_wgrad.splitk, min_time)
# print(profile_res_wgrad.algo_desp, profile_res_wgrad.splitk, min_time)
...
@@ -457,21 +449,25 @@ def indice_conv_backward(features: torch.Tensor,
...
@@ -457,21 +449,25 @@ def indice_conv_backward(features: torch.Tensor,
else
:
else
:
b_shape
=
[
maxnhot
,
out_bp_tv
.
dim
(
1
)]
b_shape
=
[
maxnhot
,
out_bp_tv
.
dim
(
1
)]
a_shape
=
[
maxnhot
,
features_tv
.
dim
(
1
)]
a_shape
=
[
maxnhot
,
features_tv
.
dim
(
1
)]
m
,
n
,
k
=
GEMM
.
extract_mnk
(
m
,
n
,
k
=
GEMM
.
extract_mnk
(
a_shape
,
a_shape
,
b_shape
,
profile_res_wgrad
.
algo_desp
.
trans_a
,
b_shape
,
profile_res_wgrad
.
algo_desp
.
trans_b
,
profile_res_wgrad
.
algo_desp
.
trans_a
,
profile_res_wgrad
.
algo_desp
.
trans_c
,
profile_res_wgrad
.
algo_desp
.
trans_b
,
arch
=
arch
,
profile_res_wgrad
.
algo_desp
.
trans_c
,
shuffle_type
=
ShuffleStrideType
.
ShuffleAB
,
arch
=
arch
,
a_inds_shape
=
[
maxnhot
],
shuffle_type
=
ShuffleStrideType
.
ShuffleAB
,
b_inds_shape
=
[
maxnhot
],
a_inds_shape
=
[
maxnhot
],
hint
=
AlgoHint
.
BackwardWeight
.
value
)
b_inds_shape
=
[
maxnhot
],
workspace_size
=
profile_res_wgrad
.
algo_desp
.
query_workspace_size
(
m
,
n
,
k
,
profile_res_wgrad
.
splitk
)
hint
=
AlgoHint
.
BackwardWeight
.
value
)
workspace_size
=
profile_res_wgrad
.
algo_desp
.
query_workspace_size
(
m
,
n
,
k
,
profile_res_wgrad
.
splitk
)
workspace
=
torch
.
Tensor
()
workspace
=
torch
.
Tensor
()
workspace_tv
=
tv
.
Tensor
()
workspace_tv
=
tv
.
Tensor
()
if
workspace_size
>
0
:
if
workspace_size
>
0
:
workspace
=
torch
.
empty
((
workspace_size
,),
dtype
=
torch
.
int8
,
device
=
features
.
device
)
workspace
=
torch
.
empty
((
workspace_size
,
),
dtype
=
torch
.
int8
,
device
=
features
.
device
)
workspace_tv
=
torch_tensor_to_tv
(
workspace
)
workspace_tv
=
torch_tensor_to_tv
(
workspace
)
# print(workspace_size, m, n, k, profile_res_wgrad.splitk)
# print(workspace_size, m, n, k, profile_res_wgrad.splitk)
# torch.cuda.synchronize()
# torch.cuda.synchronize()
...
@@ -538,11 +534,13 @@ def indice_conv_backward(features: torch.Tensor,
...
@@ -538,11 +534,13 @@ def indice_conv_backward(features: torch.Tensor,
# dw_time = time.time() - t
# dw_time = time.time() - t
# # print(dw_time + di_time, di_time, dw_time, profile_res_wgrad.splitk, profile_res_wgrad.algo_desp, dfilters.shape)
# # print(dw_time + di_time, di_time, dw_time, profile_res_wgrad.splitk, profile_res_wgrad.algo_desp, dfilters.shape)
# # print(dw_time + di_time)
# # print(dw_time + di_time)
# print(time.time() - t)
# print(
"BWG",
time.time() - t)
return
(
din
,
dfilters
.
reshape
(
filters_shape
))
return
(
din
,
dfilters
.
reshape
(
filters_shape
))
def
indice_maxpool
(
features
,
indice_pairs
,
indice_pair_num
,
num_activate_out
):
def
indice_maxpool
(
features
,
indice_pairs
,
indice_pair_num
,
num_activate_out
):
# torch.cuda.synchronize()
# t = time.time()
out_channel
=
features
.
shape
[
-
1
]
out_channel
=
features
.
shape
[
-
1
]
out_features
=
torch
.
zeros
((
num_activate_out
,
out_channel
),
out_features
=
torch
.
zeros
((
num_activate_out
,
out_channel
),
dtype
=
features
.
dtype
,
dtype
=
features
.
dtype
,
...
@@ -558,6 +556,9 @@ def indice_maxpool(features, indice_pairs, indice_pair_num, num_activate_out):
...
@@ -558,6 +556,9 @@ def indice_maxpool(features, indice_pairs, indice_pair_num, num_activate_out):
out_indices
=
torch_tensor_to_tv
(
indice_pairs
[
1
][
i
,
:
nhot
])
out_indices
=
torch_tensor_to_tv
(
indice_pairs
[
1
][
i
,
:
nhot
])
SpconvOps
.
maxpool_forward
(
out_features_tv
,
features_tv
,
out_indices
,
SpconvOps
.
maxpool_forward
(
out_features_tv
,
features_tv
,
out_indices
,
inp_indices
,
stream
)
inp_indices
,
stream
)
# torch.cuda.synchronize()
# print("M", time.time() - t)
return
out_features
return
out_features
...
...
spconv/pytorch/pool.py
View file @
62c1496f
...
@@ -142,6 +142,20 @@ class SparseMaxPool(SparseModule):
...
@@ -142,6 +142,20 @@ class SparseMaxPool(SparseModule):
return
out_tensor
return
out_tensor
class
SparseMaxPool1d
(
SparseMaxPool
):
def
__init__
(
self
,
kernel_size
,
stride
=
None
,
padding
=
0
,
dilation
=
1
,
name
=
None
):
super
(
SparseMaxPool1d
,
self
).
__init__
(
1
,
kernel_size
,
stride
,
padding
,
dilation
,
name
=
name
)
class
SparseMaxPool2d
(
SparseMaxPool
):
class
SparseMaxPool2d
(
SparseMaxPool
):
def
__init__
(
self
,
def
__init__
(
self
,
kernel_size
,
kernel_size
,
...
@@ -170,3 +184,17 @@ class SparseMaxPool3d(SparseMaxPool):
...
@@ -170,3 +184,17 @@ class SparseMaxPool3d(SparseMaxPool):
padding
,
padding
,
dilation
,
dilation
,
name
=
name
)
name
=
name
)
class
SparseMaxPool4d
(
SparseMaxPool
):
def
__init__
(
self
,
kernel_size
,
stride
=
None
,
padding
=
0
,
dilation
=
1
,
name
=
None
):
super
(
SparseMaxPool4d
,
self
).
__init__
(
4
,
kernel_size
,
stride
,
padding
,
dilation
,
name
=
name
)
test/aaa.py
View file @
62c1496f
...
@@ -13,20 +13,20 @@
...
@@ -13,20 +13,20 @@
# limitations under the License.
# limitations under the License.
STR
=
"""
STR
=
"""
0.0016176700592041016
BWG 0.0008761882781982422
0.002481698989868164
BWG 0.0008311271667480469
0.002
7854442596435547
BWG
0.002
079486846923828
0.0031723976135253906
BWG 0.002329587936401367
0.0017437934875488281
BWG 0.0025458335876464844
0.002
0503997802734375
BWG
0.002
6700496673583984
0.001399993896484375
BWG 0.002583742141723633
0.00161838531494140
62
BWG 0.00252628326416015
62
0.0007357597351074219
BWG 0.003481149673461914
0.0008492469787597656
BWG 0.003238201141357422
0.0006558895111083984
BWG 0.005095958709716797
0.00
07
99
4
17
4957275391
BWG
0.00
378
99
0
17
333984375
0.000335693359375
BWG 0.003931283950805664
0.000347137451171875
BWG 0.003300189971923828
"""
"""
"""
"""
0.003921985626220703
0.003921985626220703
...
@@ -46,37 +46,86 @@ STR = """
...
@@ -46,37 +46,86 @@ STR = """
0.00030994415283203125
0.00030994415283203125
"""
"""
STR
=
"""
STR1
=
"""
0.0006084442138671875
SUBM 0.00036716461181640625
0.0005354881286621094
G 0.0010955333709716797
0.0012688636779785156
G 0.0010745525360107422
0.0012619495391845703
REGU 0.0006923675537109375
0.002301931381225586
M 0.0005242824554443359
0.0019693374633789062
SUBM 0.0003108978271484375
0.0038712024688720703
G 0.0010905265808105469
0.002872467041015625
G 0.0011067390441894531
0.005068302154541016
REGU 0.00058746337890625
0.0047588348388671875
M 0.0005304813385009766
0.007832765579223633
SUBM 0.0002682209014892578
0.005643367767333984
G 0.0010945796966552734
0.005807161331176758
G 0.0011165142059326172
0.004715442657470703"""
REGU 0.0005419254302978516
M 0.0005164146423339844
SUBM 0.00021505355834960938
G 0.0010805130004882812
G 0.0010516643524169922
REGU 0.00052642822265625
M 0.0004677772521972656
SUBM 0.0002262592315673828
G 0.0010986328125
G 0.0010256767272949219
REGU 0.0005693435668945312
M 0.00048661231994628906
SUBM 0.0002319812774658203
G 0.0011110305786132812
G 0.0011196136474609375
REGU 0.0005295276641845703
M 0.0005729198455810547
SUBM 0.00023889541625976562
G 0.0005326271057128906
G 0.0005140304565429688
"""
"""
0.0004992485046386719
STR2
=
"""
0.0003979206085205078
SUBM 0.0003352165222167969
0.0013720989227294922
G 0.001149892807006836
0.0015933513641357422
G 0.0017066001892089844
0.0027768611907958984
REGU 0.0006349086761474609
0.0024590492248535156
M 0.00048804283142089844
0.004837512969970703
SUBM 0.00029850006103515625
0.004601001739501953
G 0.001767873764038086
0.009881019592285156
G 0.0020656585693359375
0.008889913558959961
REGU 0.0005462169647216797
0.017162084579467773
M 0.0005753040313720703
0.009079217910766602
SUBM 0.0002789497375488281
0.009355545043945312
G 0.0012230873107910156
0.0068836212158203125
G 0.0014438629150390625
REGU 0.0005102157592773438
M 0.0005676746368408203
SUBM 0.00020241737365722656
G 0.00102996826171875
G 0.0011174678802490234
REGU 0.0005424022674560547
M 0.0005102157592773438
SUBM 0.0001976490020751953
G 0.0010385513305664062
G 0.0010204315185546875
REGU 0.0005321502685546875
M 0.00047278404235839844
SUBM 0.00021529197692871094
G 0.0010280609130859375
G 0.0010151863098144531
REGU 0.0004942417144775391
M 0.0004811286926269531
SUBM 0.00020694732666015625
G 0.0005142688751220703
G 0.0005171298980712891
"""
"""
def
_handle_lines
(
s
:
str
):
arr
=
s
.
split
(
" "
)
return
(
arr
[
0
],
float
(
arr
[
-
1
]))
from
cumm.gemm.codeops
import
group_by
def
print_str
(
s
:
str
):
nums
=
list
(
map
(
_handle_lines
,
s
.
strip
().
split
(
"
\n
"
)))
num_dict
=
group_by
(
lambda
x
:
x
[
0
],
nums
)
num_dict_
=
{
k
:
sum
([
vv
[
1
]
for
vv
in
v
])
for
k
,
v
in
num_dict
.
items
()}
print
(
num_dict_
)
nums
=
list
(
map
(
float
,
STR
.
strip
().
split
(
"
\n
"
)))
print_str
(
STR1
)
print
(
sum
(
nums
))
print_str
(
STR2
)
\ No newline at end of file
\ No newline at end of file
test/benchmark.py
View file @
62c1496f
...
@@ -224,25 +224,26 @@ def main():
...
@@ -224,25 +224,26 @@ def main():
# voxels, coors, spatial_shape = waymo_data()
# voxels, coors, spatial_shape = waymo_data()
# with open("/home/yy/test_spconv.pkl", "wb") as f:
# with open("/home/yy/test_spconv.pkl", "wb") as f:
# pickle.dump((voxels, coors, spatial_shape), f)
# pickle.dump((voxels, coors, spatial_shape), f)
with
open
(
"/home/yy/
test_spconv.pkl"
,
"rb"
)
as
f
:
with
open
(
Path
(
__file__
).
parent
/
"data"
/
"
test_spconv.pkl"
,
"rb"
)
as
f
:
(
voxels
,
coors
,
spatial_shape
)
=
pickle
.
load
(
f
)
(
voxels
,
coors
,
spatial_shape
)
=
pickle
.
load
(
f
)
print
(
spatial_shape
)
print
(
spatial_shape
)
print
(
voxels
.
shape
)
print
(
voxels
.
shape
)
# voxels = voxels[:100]
# voxels = voxels[:100]
# coors = coors[:100]
# coors = coors[:100]
voxels_th
=
torch
.
from_numpy
(
voxels
).
cuda
().
float
()
dtype
=
torch
.
float32
voxels_th
=
torch
.
from_numpy
(
voxels
).
cuda
().
to
(
dtype
)
coors_th
=
torch
.
from_numpy
(
coors
).
cuda
().
int
()
coors_th
=
torch
.
from_numpy
(
coors
).
cuda
().
int
()
voxels_th
.
requires_grad
=
True
voxels_th
.
requires_grad
=
True
algo
=
spconv
.
ConvAlgo
.
Native
algo
=
spconv
.
ConvAlgo
.
Native
net
=
Net
(
spatial_shape
,
algo
).
cuda
().
eval
().
float
(
)
net
=
Net
(
spatial_shape
,
algo
).
cuda
().
eval
().
to
(
dtype
)
print
(
coors_th
.
shape
)
print
(
coors_th
.
shape
)
out
=
net
(
voxels_th
,
coors_th
,
1
)
out
=
net
(
voxels_th
,
coors_th
,
1
)
print
(
out
.
spatial_shape
)
print
(
out
.
spatial_shape
)
print
(
voxels
.
mean
(),
voxels
.
max
(),
voxels
.
min
())
print
(
voxels
.
mean
(),
voxels
.
max
(),
voxels
.
min
())
dout
=
np
.
random
.
uniform
(
-
0.2
,
0.2
,
dout
=
np
.
random
.
uniform
(
-
0.2
,
0.2
,
out
.
features
.
shape
).
astype
(
np
.
float32
)
out
.
features
.
shape
).
astype
(
np
.
float32
)
dout_t
=
torch
.
from_numpy
(
dout
).
cuda
()
dout_t
=
torch
.
from_numpy
(
dout
).
cuda
()
.
to
(
dtype
)
print
(
out
.
spatial_shape
,
out
.
features
.
mean
(),
out
.
features
.
max
(),
out
.
features
.
min
())
print
(
out
.
spatial_shape
,
out
.
features
.
mean
(),
out
.
features
.
max
(),
out
.
features
.
min
())
times
=
[]
times
=
[]
...
...
test/data/test_spconv.pkl
0 → 100644
View file @
62c1496f
File added
test/test_conv.py
View file @
62c1496f
...
@@ -381,17 +381,17 @@ class TestSpConv(TestCase):
...
@@ -381,17 +381,17 @@ class TestSpConv(TestCase):
else
:
else
:
filters
=
np
.
random
.
uniform
(
0
,
1
,
size
=
[
k
,
k
,
k
,
OC
,
filters
=
np
.
random
.
uniform
(
0
,
1
,
size
=
[
k
,
k
,
k
,
OC
,
IC
]).
astype
(
np
.
float32
)
IC
]).
astype
(
np
.
float32
)
dtype
=
torch
.
float16
indices_t
=
torch
.
from_numpy
(
indices
).
int
().
to
(
device
)
indices_t
=
torch
.
from_numpy
(
indices
).
int
().
to
(
device
)
features_t
=
torch
.
from_numpy
(
features
).
to
(
device
)
features_t
=
torch
.
from_numpy
(
features
).
to
(
device
)
.
to
(
dtype
)
features_t
.
requires_grad
=
True
features_t
.
requires_grad
=
True
features_dense_t
=
torch
.
from_numpy
(
features_dense
).
to
(
device
)
features_dense_t
=
torch
.
from_numpy
(
features_dense
).
to
(
device
)
.
to
(
dtype
)
features_dense_t
.
requires_grad
=
True
features_dense_t
.
requires_grad
=
True
net
=
SparseConv3dTestTorch
(
1
,
3
,
shape
,
IC
,
OC
,
k
,
s
,
p
,
net
=
SparseConv3dTestTorch
(
1
,
3
,
shape
,
IC
,
OC
,
k
,
s
,
p
,
d
).
to
(
device
)
d
).
to
(
device
)
.
to
(
dtype
)
net_ref
=
Conv3dTestTorch
(
1
,
3
,
shape
,
IC
,
OC
,
k
,
s
,
p
,
net_ref
=
Conv3dTestTorch
(
1
,
3
,
shape
,
IC
,
OC
,
k
,
s
,
p
,
d
).
to
(
device
)
d
).
to
(
device
)
.
to
(
dtype
)
filters_t
=
torch
.
from_numpy
(
filters
).
to
(
device
)
filters_t
=
torch
.
from_numpy
(
filters
).
to
(
device
)
.
to
(
dtype
)
if
FILTER_HWIO
:
if
FILTER_HWIO
:
net_ref
.
net
[
0
].
weight
.
data
[:]
=
filters_t
.
permute
(
4
,
3
,
0
,
1
,
net_ref
.
net
[
0
].
weight
.
data
[:]
=
filters_t
.
permute
(
4
,
3
,
0
,
1
,
2
).
contiguous
()
2
).
contiguous
()
...
@@ -442,6 +442,11 @@ class TestSpConv(TestCase):
...
@@ -442,6 +442,11 @@ class TestSpConv(TestCase):
strides
=
[
2
,
3
]
strides
=
[
2
,
3
]
paddings
=
[
0
,
1
,
2
]
paddings
=
[
0
,
1
,
2
]
dilations
=
[
1
,
2
,
3
]
dilations
=
[
1
,
2
,
3
]
ksizes
=
[
3
]
strides
=
[
1
]
paddings
=
[
0
]
dilations
=
[
1
]
for
dev
,
shape
,
bs
,
IC
,
OC
,
k
,
s
,
p
,
d
in
params_grid
(
for
dev
,
shape
,
bs
,
IC
,
OC
,
k
,
s
,
p
,
d
in
params_grid
(
devices
,
shapes
,
batchsizes
,
in_channels
,
out_channels
,
ksizes
,
devices
,
shapes
,
batchsizes
,
in_channels
,
out_channels
,
ksizes
,
...
@@ -458,8 +463,13 @@ class TestSpConv(TestCase):
...
@@ -458,8 +463,13 @@ class TestSpConv(TestCase):
indices
=
np
.
ascontiguousarray
(
indices
=
np
.
ascontiguousarray
(
sparse_dict
[
"indices"
][:,
[
3
,
0
,
1
,
2
]]).
astype
(
np
.
int32
)
sparse_dict
[
"indices"
][:,
[
3
,
0
,
1
,
2
]]).
astype
(
np
.
int32
)
features_dense
=
sparse_dict
[
"features_dense"
].
astype
(
np
.
float32
)
features_dense
=
sparse_dict
[
"features_dense"
].
astype
(
np
.
float32
)
filters
=
np
.
random
.
uniform
(
0
,
1
,
size
=
[
k
,
k
,
k
,
IC
,
if
FILTER_HWIO
:
OC
]).
astype
(
np
.
float32
)
filters
=
np
.
random
.
uniform
(
0
,
1
,
size
=
[
k
,
k
,
k
,
IC
,
OC
]).
astype
(
np
.
float32
)
else
:
filters
=
np
.
random
.
uniform
(
0
,
1
,
size
=
[
k
,
k
,
k
,
OC
,
IC
]).
astype
(
np
.
float32
)
indices_t
=
torch
.
from_numpy
(
indices
).
int
().
to
(
device
)
indices_t
=
torch
.
from_numpy
(
indices
).
int
().
to
(
device
)
features_t
=
torch
.
from_numpy
(
features
).
to
(
device
)
features_t
=
torch
.
from_numpy
(
features
).
to
(
device
)
features_t
.
requires_grad
=
True
features_t
.
requires_grad
=
True
...
@@ -470,11 +480,20 @@ class TestSpConv(TestCase):
...
@@ -470,11 +480,20 @@ class TestSpConv(TestCase):
net_ref
=
DeConv3dTestTorch
(
1
,
3
,
shape
,
IC
,
OC
,
k
,
s
,
p
,
net_ref
=
DeConv3dTestTorch
(
1
,
3
,
shape
,
IC
,
OC
,
k
,
s
,
p
,
d
).
to
(
device
)
d
).
to
(
device
)
filters_t
=
torch
.
from_numpy
(
filters
).
to
(
device
)
filters_t
=
torch
.
from_numpy
(
filters
).
to
(
device
)
net_ref
.
net
[
0
].
weight
.
data
[:]
=
filters_t
.
permute
(
4
,
3
,
0
,
1
,
print
(
net_ref
.
net
[
0
].
weight
.
shape
)
2
).
contiguous
()
if
FILTER_HWIO
:
net_ref
.
net
[
0
].
weight
.
data
[:]
=
filters_t
.
permute
(
3
,
4
,
0
,
1
,
2
).
contiguous
()
else
:
net_ref
.
net
[
0
].
weight
.
data
[:]
=
filters_t
.
permute
(
4
,
3
,
0
,
1
,
2
).
contiguous
()
net
.
net
[
0
].
weight
.
data
[:]
=
filters_t
net
.
net
[
0
].
weight
.
data
[:]
=
filters_t
out_ref
=
net_ref
(
features_dense_t
)
out_ref
=
net_ref
(
features_dense_t
)
out
=
net
(
features_t
,
indices_t
,
bs
).
dense
()
out
=
net
(
features_t
,
indices_t
,
bs
).
dense
()
out_np
=
out
.
detach
().
cpu
().
numpy
()
out_ref_np
=
out_ref
.
detach
().
cpu
().
numpy
()
self
.
assertAllClose
(
out_np
,
out_ref_np
,
atol
=
1e-4
)
dout
=
np
.
random
.
uniform
(
-
0.2
,
0.2
,
dout
=
np
.
random
.
uniform
(
-
0.2
,
0.2
,
out_ref
.
shape
).
astype
(
features
.
dtype
)
out_ref
.
shape
).
astype
(
features
.
dtype
)
dout_t
=
torch
.
from_numpy
(
dout
).
to
(
device
)
dout_t
=
torch
.
from_numpy
(
dout
).
to
(
device
)
...
@@ -490,12 +509,12 @@ class TestSpConv(TestCase):
...
@@ -490,12 +509,12 @@ class TestSpConv(TestCase):
for
layer
,
layer_ref
in
zip
(
net
.
net
,
net_ref
.
net
):
for
layer
,
layer_ref
in
zip
(
net
.
net
,
net_ref
.
net
):
dw
=
layer
.
weight
.
grad
.
detach
().
cpu
().
numpy
()
dw
=
layer
.
weight
.
grad
.
detach
().
cpu
().
numpy
()
dw_ref
=
layer_ref
.
weight
.
grad
.
detach
().
cpu
().
numpy
()
dw_ref
=
layer_ref
.
weight
.
grad
.
detach
().
cpu
().
numpy
()
dw
=
dw
.
transpose
(
4
,
3
,
0
,
1
,
2
)
if
FILTER_HWIO
:
dw
=
dw
.
transpose
(
3
,
4
,
0
,
1
,
2
)
else
:
dw
=
dw
.
transpose
(
4
,
3
,
0
,
1
,
2
)
self
.
assertAllClose
(
dw
,
dw_ref
,
atol
=
1e-4
)
self
.
assertAllClose
(
dw
,
dw_ref
,
atol
=
1e-4
)
out_np
=
out
.
detach
().
cpu
().
numpy
()
out_ref_np
=
out_ref
.
detach
().
cpu
().
numpy
()
self
.
assertAllClose
(
out_np
,
out_ref_np
,
atol
=
1e-4
)
def
testSpCpConv3d
(
self
):
def
testSpCpConv3d
(
self
):
np
.
random
.
seed
(
484
)
np
.
random
.
seed
(
484
)
...
...
tools/build-wheels.sh
View file @
62c1496f
...
@@ -27,8 +27,8 @@ function repair_wheel {
...
@@ -27,8 +27,8 @@ function repair_wheel {
export
SPCONV_DISABLE_JIT
=
"1"
export
SPCONV_DISABLE_JIT
=
"1"
export
CUMM_CUDA_ARCH_LIST
=
"all"
export
CUMM_CUDA_ARCH_LIST
=
"all"
# Compile wheels, we only support 3.
7
-3.10.
# Compile wheels, we only support 3.
6
-3.10.
#
"/opt/python/cp36-cp36m/bin/pip" wheel /io/ --no-deps -w /io/wheelhouse_tmp
"/opt/python/cp36-cp36m/bin/pip"
wheel /io/
--no-deps
-w
/io/wheelhouse_tmp
"/opt/python/cp37-cp37m/bin/pip"
wheel /io/
--no-deps
-w
/io/wheelhouse_tmp
"/opt/python/cp37-cp37m/bin/pip"
wheel /io/
--no-deps
-w
/io/wheelhouse_tmp
"/opt/python/cp38-cp38/bin/pip"
wheel /io/
--no-deps
-w
/io/wheelhouse_tmp
"/opt/python/cp38-cp38/bin/pip"
wheel /io/
--no-deps
-w
/io/wheelhouse_tmp
"/opt/python/cp39-cp39/bin/pip"
wheel /io/
--no-deps
-w
/io/wheelhouse_tmp
"/opt/python/cp39-cp39/bin/pip"
wheel /io/
--no-deps
-w
/io/wheelhouse_tmp
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment