Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
one
spconv
Commits
591bbc87
Commit
591bbc87
authored
Nov 28, 2021
by
yan.yan
Browse files
fix strange windows problem
parent
c8df384c
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
99 additions
and
50 deletions
+99
-50
CHANGELOG.md
CHANGELOG.md
+4
-0
spconv/csrc/hash/core.py
spconv/csrc/hash/core.py
+78
-47
spconv/pytorch/functional.py
spconv/pytorch/functional.py
+16
-2
version.txt
version.txt
+1
-1
No files found.
CHANGELOG.md
View file @
591bbc87
# Changelog
# Changelog
## [2.1.16] - 2021-11-28
### Fixed
-
Fix strange compile problem in windows
## [2.1.15] - 2021-11-28
## [2.1.15] - 2021-11-28
### Fixed
### Fixed
-
Fix missing pccm.Class in setup.py
-
Fix missing pccm.Class in setup.py
...
...
spconv/csrc/hash/core.py
View file @
591bbc87
...
@@ -14,9 +14,11 @@
...
@@ -14,9 +14,11 @@
import
os
import
os
from
pathlib
import
Path
from
pathlib
import
Path
from
typing
import
List
from
cumm.constants
import
CUMM_CPU_ONLY_BUILD
from
cumm.constants
import
CUMM_CPU_ONLY_BUILD
import
pccm
import
pccm
from
cumm
import
dtypes
from
cumm.common
import
(
TensorView
,
TensorViewCPU
,
TensorViewHashKernel
,
from
cumm.common
import
(
TensorView
,
TensorViewCPU
,
TensorViewHashKernel
,
TensorViewKernel
,
TslRobinMap
)
TensorViewKernel
,
TslRobinMap
)
from
spconv.csrc.sparse.cpu_core
import
OMPLib
from
spconv.csrc.sparse.cpu_core
import
OMPLib
...
@@ -26,6 +28,32 @@ if CUMM_CPU_ONLY_BUILD:
...
@@ -26,6 +28,32 @@ if CUMM_CPU_ONLY_BUILD:
else
:
else
:
_member_func
=
pccm
.
cuda
.
member_function
_member_func
=
pccm
.
cuda
.
member_function
def
_dispatch_ints
(
code
:
pccm
.
FunctionCode
,
ints
:
List
[
int
],
var
:
str
):
for
i
,
val
in
enumerate
(
ints
):
if
i
==
0
:
with
code
.
if_
(
f
"
{
var
}
==
{
val
}
"
):
yield
val
else
:
with
code
.
else_if_
(
f
"
{
var
}
==
{
val
}
"
):
yield
val
with
code
.
else_
():
code
.
raw
(
f
"""
TV_THROW_RT_ERR("unknown val
{
var
}
, available:
{
ints
}
")
"""
)
def
_dispatch
(
code
:
pccm
.
FunctionCode
,
dts
:
List
[
dtypes
.
DType
],
var
:
str
):
for
i
,
dtype
in
enumerate
(
dts
):
if
i
==
0
:
with
code
.
if_
(
f
"
{
var
}
== tv::DType(
{
dtype
.
tv_dtype
}
)"
):
yield
dtype
else
:
with
code
.
else_if_
(
f
"
{
var
}
== tv::DType(
{
dtype
.
tv_dtype
}
)"
):
yield
dtype
with
code
.
else_
():
code
.
raw
(
f
"""
TV_THROW_RT_ERR("unknown dtype
{
var
}
, available:
{
dts
}
")
"""
)
class
HashTable
(
pccm
.
Class
,
pccm
.
pybind
.
PybindClassMixin
):
class
HashTable
(
pccm
.
Class
,
pccm
.
pybind
.
PybindClassMixin
):
"""a simple hashtable for both cpu and cuda.
"""a simple hashtable for both cpu and cuda.
...
@@ -107,14 +135,17 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
...
@@ -107,14 +135,17 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
with
code
.
else_
():
with
code
.
else_
():
code
.
raw
(
f
"""
code
.
raw
(
f
"""
auto custream = reinterpret_cast<cudaStream_t>(stream);
auto custream = reinterpret_cast<cudaStream_t>(stream);
tv::dispatch_int<4, 8>(keys_data.itemsize(), [&](auto IK){{
"""
)
constexpr int IKV = TV_DECLTYPE(IK)::value;
for
k_items
in
_dispatch_ints
(
code
,
[
4
,
8
],
"keys_data.itemsize()"
):
using K = tv::hash::itemsize_to_unsigned_t<IKV>;
code
.
raw
(
f
"""
using K = tv::hash::itemsize_to_unsigned_t<
{
k_items
}
>;
constexpr K kEmptyKey = std::numeric_limits<K>::max();
constexpr K kEmptyKey = std::numeric_limits<K>::max();
K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data());
K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data());
tv::dispatch_int<4, 8>(values_data.itemsize(), [&](auto IV){{
"""
)
constexpr int IVV = TV_DECLTYPE(IV)::value;
for
v_items
in
_dispatch_ints
(
code
,
[
4
,
8
],
"values_data.itemsize()"
):
using V = tv::hash::itemsize_to_unsigned_t<IVV>;
code
.
raw
(
f
"""
using V = tv::hash::itemsize_to_unsigned_t<
{
v_items
}
>;
V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data());
V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data());
using table_t =
using table_t =
tv::hash::LinearHashTableSplit<K, V, tv::hash::Murmur3Hash<K>,
tv::hash::LinearHashTableSplit<K, V, tv::hash::Murmur3Hash<K>,
...
@@ -122,9 +153,7 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
...
@@ -122,9 +153,7 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0));
table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0));
tv::cuda::Launch launcher(table.size(), custream);
tv::cuda::Launch launcher(table.size(), custream);
launcher(tv::hash::clear_table_split<table_t>, table);
launcher(tv::hash::clear_table_split<table_t>, table);
}});
"""
)
}});
"""
)
return
code
return
code
...
@@ -174,26 +203,29 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
...
@@ -174,26 +203,29 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
with
code
.
else_
():
with
code
.
else_
():
code
.
raw
(
f
"""
code
.
raw
(
f
"""
auto custream = reinterpret_cast<cudaStream_t>(stream);
auto custream = reinterpret_cast<cudaStream_t>(stream);
tv::dispatch_int<4, 8>(keys_data.itemsize(), [&](auto IK){{
"""
)
constexpr int IKV = TV_DECLTYPE(IK)::value;
for
k_items
in
_dispatch_ints
(
code
,
[
4
,
8
],
"keys_data.itemsize()"
):
using K = tv::hash::itemsize_to_unsigned_t<IKV>;
code
.
raw
(
f
"""
using K = tv::hash::itemsize_to_unsigned_t<
{
k_items
}
>;
constexpr K kEmptyKey = std::numeric_limits<K>::max();
constexpr K kEmptyKey = std::numeric_limits<K>::max();
K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data());
K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data());
const K* key_ptr = reinterpret_cast<const K*>(keys.raw_data());
const K* key_ptr = reinterpret_cast<const K*>(keys.raw_data());
tv::dispatch_int<4, 8>(values_data.itemsize(), [&](auto IV){{
constexpr int IVV = TV_DECLTYPE(IV)::value;
"""
)
using V = tv::hash::itemsize_to_unsigned_t<IVV>;
for
v_items
in
_dispatch_ints
(
code
,
[
4
,
8
],
"values_data.itemsize()"
):
code
.
raw
(
f
"""
using V = tv::hash::itemsize_to_unsigned_t<
{
v_items
}
>;
V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data());
V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data());
const V* value_ptr = reinterpret_cast<const V*>(values.raw_data());
const V* value_ptr = reinterpret_cast<const V*>(values.raw_data());
using table_t =
using table_t =
tv::hash::LinearHashTableSplit<K, V, tv::hash::Murmur3Hash<K>,
tv::hash::LinearHashTableSplit<K, V, tv::hash::Murmur3Hash<K>,
kEmptyKey, false>;
kEmptyKey, false>;
tv::cuda::Launch launcher(N, custream);
tv::cuda::Launch launcher(N, custream);
table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0));
table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0));
launcher(tv::hash::insert_split<table_t>, table, key_ptr, value_ptr, size_t(N));
launcher(tv::hash::insert_split<table_t>, table, key_ptr, value_ptr, size_t(N));
}});
"""
)
}});
"""
)
else
:
else
:
code
.
raw
(
f
"""
code
.
raw
(
f
"""
TV_THROW_RT_ERR("spconv not compiled with cuda, don't support cuda");
TV_THROW_RT_ERR("spconv not compiled with cuda, don't support cuda");
...
@@ -244,17 +276,18 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
...
@@ -244,17 +276,18 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
with
code
.
else_
():
with
code
.
else_
():
code
.
raw
(
f
"""
code
.
raw
(
f
"""
auto custream = reinterpret_cast<cudaStream_t>(stream);
auto custream = reinterpret_cast<cudaStream_t>(stream);
tv::dispatch_int<4, 8>(keys_data.itemsize(), [&](auto IK){{
"""
)
constexpr int IKV = TV_DECLTYPE(IK)::value;
for
k_items
in
_dispatch_ints
(
code
,
[
4
,
8
],
"keys_data.itemsize()"
):
code
.
raw
(
f
"""
using K = tv::hash::itemsize_to_unsigned_t<
IKV
>;
using K = tv::hash::itemsize_to_unsigned_t<
{
k_items
}
>;
constexpr K kEmptyKey = std::numeric_limits<K>::max();
constexpr K kEmptyKey = std::numeric_limits<K>::max();
K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data());
K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data());
K* key_ptr = reinterpret_cast<K*>(keys.raw_data());
K* key_ptr = reinterpret_cast<K*>(keys.raw_data());
tv::dispatch_int<4, 8>(values_data.itemsize(), [&](auto IV){{
constexpr int IVV = TV_DECLTYPE(IV)::value;
using V = tv::hash::itemsize_to_unsigned_t<IVV>;
"""
)
for
v_items
in
_dispatch_ints
(
code
,
[
4
,
8
],
"values_data.itemsize()"
):
code
.
raw
(
f
"""
using V = tv::hash::itemsize_to_unsigned_t<
{
v_items
}
>;
V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data());
V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data());
V* value_ptr = reinterpret_cast<V*>(values.raw_data());
V* value_ptr = reinterpret_cast<V*>(values.raw_data());
using table_t =
using table_t =
...
@@ -263,9 +296,7 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
...
@@ -263,9 +296,7 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
tv::cuda::Launch launcher(N, custream);
tv::cuda::Launch launcher(N, custream);
table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0));
table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0));
launcher(tv::hash::query_split<table_t>, table, key_ptr, value_ptr, is_empty_ptr, size_t(N));
launcher(tv::hash::query_split<table_t>, table, key_ptr, value_ptr, is_empty_ptr, size_t(N));
}});
"""
)
}});
"""
)
else
:
else
:
code
.
raw
(
f
"""
code
.
raw
(
f
"""
TV_THROW_RT_ERR("spconv not compiled with cuda, don't support cuda");
TV_THROW_RT_ERR("spconv not compiled with cuda, don't support cuda");
...
@@ -302,15 +333,19 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
...
@@ -302,15 +333,19 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
code
.
raw
(
f
"""
code
.
raw
(
f
"""
TV_ASSERT_RT_ERR(count.device() == 0, "count must be cuda");
TV_ASSERT_RT_ERR(count.device() == 0, "count must be cuda");
auto custream = reinterpret_cast<cudaStream_t>(stream);
auto custream = reinterpret_cast<cudaStream_t>(stream);
tv::dispatch_int<4, 8>(keys_data.itemsize(), [&](auto IK){{
"""
)
constexpr int IKV = TV_DECLTYPE(IK)::value;
for
k_items
in
_dispatch_ints
(
code
,
[
4
,
8
],
"keys_data.itemsize()"
):
using K = tv::hash::itemsize_to_unsigned_t<IKV>;
code
.
raw
(
f
"""
using K = tv::hash::itemsize_to_unsigned_t<
{
k_items
}
>;
constexpr K kEmptyKey = std::numeric_limits<K>::max();
constexpr K kEmptyKey = std::numeric_limits<K>::max();
auto count_ptr = count.data_ptr<K>();
auto count_ptr = count.data_ptr<K>();
K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data());
K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data());
tv::dispatch<int32_t, int64_t, uint32_t, uint64_t>(values_data.dtype(), [&](auto IV){{
"""
)
using V = TV_DECLTYPE(IV);
val_dtypes
=
[
dtypes
.
int32
,
dtypes
.
int64
,
dtypes
.
uint32
,
dtypes
.
uint64
]
for
v_dtype
in
_dispatch
(
code
,
val_dtypes
,
"values_data.dtype()"
):
code
.
raw
(
f
"""
using V =
{
v_dtype
}
;
V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data());
V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data());
using table_t =
using table_t =
tv::hash::LinearHashTableSplit<K, V, tv::hash::Murmur3Hash<K>,
tv::hash::LinearHashTableSplit<K, V, tv::hash::Murmur3Hash<K>,
...
@@ -318,9 +353,7 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
...
@@ -318,9 +353,7 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0));
table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0));
tv::cuda::Launch launcher(table.size(), custream);
tv::cuda::Launch launcher(table.size(), custream);
launcher(tv::hash::assign_arange_split<table_t, K>, table, count_ptr);
launcher(tv::hash::assign_arange_split<table_t, K>, table, count_ptr);
}});
"""
)
}});
"""
)
else
:
else
:
code
.
raw
(
f
"""
code
.
raw
(
f
"""
TV_THROW_RT_ERR("spconv not compiled with cuda, don't support cuda");
TV_THROW_RT_ERR("spconv not compiled with cuda, don't support cuda");
...
@@ -389,20 +422,20 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
...
@@ -389,20 +422,20 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
with
code
.
else_
():
with
code
.
else_
():
code
.
raw
(
f
"""
code
.
raw
(
f
"""
auto custream = reinterpret_cast<cudaStream_t>(stream);
auto custream = reinterpret_cast<cudaStream_t>(stream);
tv::dispatch_int<4, 8>(keys_data.itemsize(), [&](auto IK){{
"""
)
constexpr int IKV = TV_DECLTYPE(IK)::value;
for
k_items
in
_dispatch_ints
(
code
,
[
4
,
8
],
"keys_data.itemsize()"
):
code
.
raw
(
f
"""
using K = tv::hash::itemsize_to_unsigned_t<
IKV
>;
using K = tv::hash::itemsize_to_unsigned_t<
{
k_items
}
>;
auto count_ptr = count.data_ptr<K>();
auto count_ptr = count.data_ptr<K>();
constexpr K kEmptyKey = std::numeric_limits<K>::max();
constexpr K kEmptyKey = std::numeric_limits<K>::max();
K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data());
K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data());
K* key_ptr = reinterpret_cast<K*>(keys.raw_data());
K* key_ptr = reinterpret_cast<K*>(keys.raw_data());
tv::dispatch_int<4, 8>(values_data.itemsize(), [&](auto IV){{
constexpr int IVV = TV_DECLTYPE(IV)::value;
using V = tv::hash::itemsize_to_unsigned_t<IVV>;
"""
)
for
v_items
in
_dispatch_ints
(
code
,
[
4
,
8
],
"values_data.itemsize()"
):
code
.
raw
(
f
"""
using V = tv::hash::itemsize_to_unsigned_t<
{
v_items
}
>;
V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data());
V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data());
V* value_ptr = reinterpret_cast<V*>(values.raw_data());
V* value_ptr = reinterpret_cast<V*>(values.raw_data());
using table_t =
using table_t =
...
@@ -411,9 +444,7 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
...
@@ -411,9 +444,7 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
tv::cuda::Launch launcher(N, custream);
tv::cuda::Launch launcher(N, custream);
table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0));
table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0));
launcher(tv::hash::iterate_table_split<table_t, K>, table, key_ptr, value_ptr, size_t(N), count_ptr);
launcher(tv::hash::iterate_table_split<table_t, K>, table, key_ptr, value_ptr, size_t(N), count_ptr);
}});
"""
)
}});
"""
)
else
:
else
:
code
.
raw
(
f
"""
code
.
raw
(
f
"""
TV_THROW_RT_ERR("spconv not compiled with cuda, don't support cuda");
TV_THROW_RT_ERR("spconv not compiled with cuda, don't support cuda");
...
...
spconv/pytorch/functional.py
View file @
591bbc87
...
@@ -371,12 +371,25 @@ def _indice_to_scalar(indices: torch.Tensor, shape: List[int]):
...
@@ -371,12 +371,25 @@ def _indice_to_scalar(indices: torch.Tensor, shape: List[int]):
return
scalar_inds
.
contiguous
()
return
scalar_inds
.
contiguous
()
def
sparse_add_hash_based
(
*
tens
:
SparseConvTensor
):
def
sparse_add_hash_based
(
*
tens
:
SparseConvTensor
):
""" sparse add with misaligned indices.
if you use sparse add, the indice_dict will be dropped and impossible
to use inverse.
There is only one situation that keep indices: there is one operand that
its indices is output indices.
"""
table_size
=
0
table_size
=
0
for
ten
in
tens
:
max_num_indices
=
0
max_num_indices_idx
=
0
for
i
,
ten
in
enumerate
(
tens
):
assert
ten
.
spatial_shape
==
tens
[
0
].
spatial_shape
assert
ten
.
spatial_shape
==
tens
[
0
].
spatial_shape
assert
ten
.
batch_size
==
tens
[
0
].
batch_size
assert
ten
.
batch_size
==
tens
[
0
].
batch_size
assert
ten
.
features
.
shape
[
1
]
==
tens
[
0
].
features
.
shape
[
1
]
assert
ten
.
features
.
shape
[
1
]
==
tens
[
0
].
features
.
shape
[
1
]
table_size
+=
ten
.
features
.
shape
[
0
]
table_size
+=
ten
.
features
.
shape
[
0
]
if
max_num_indices
<
ten
.
features
.
shape
[
0
]:
max_num_indices_idx
=
i
max_num_indices
=
ten
.
features
.
shape
[
0
]
first
=
tens
[
0
]
first
=
tens
[
0
]
feat
=
first
.
features
feat
=
first
.
features
shape
=
[
first
.
batch_size
,
*
first
.
spatial_shape
]
shape
=
[
first
.
batch_size
,
*
first
.
spatial_shape
]
...
@@ -399,7 +412,6 @@ def sparse_add_hash_based(*tens: SparseConvTensor):
...
@@ -399,7 +412,6 @@ def sparse_add_hash_based(*tens: SparseConvTensor):
count_val
=
count
.
item
()
count_val
=
count
.
item
()
out_features
=
torch
.
zeros
([
int
(
count_val
),
feat
.
shape
[
1
]],
dtype
=
feat
.
dtype
,
device
=
feat
.
device
)
out_features
=
torch
.
zeros
([
int
(
count_val
),
feat
.
shape
[
1
]],
dtype
=
feat
.
dtype
,
device
=
feat
.
device
)
out_indices
=
torch
.
zeros
([
int
(
count_val
),
first
.
indices
.
shape
[
1
]],
dtype
=
first
.
indices
.
dtype
,
device
=
first
.
indices
.
device
)
out_indices
=
torch
.
zeros
([
int
(
count_val
),
first
.
indices
.
shape
[
1
]],
dtype
=
first
.
indices
.
dtype
,
device
=
first
.
indices
.
device
)
for
ten
,
scalar
in
zip
(
tens
,
scalars
):
for
ten
,
scalar
in
zip
(
tens
,
scalars
):
out_inds
,
_
=
table
.
query
(
scalar
)
out_inds
,
_
=
table
.
query
(
scalar
)
out_inds
=
out_inds
.
long
()
out_inds
=
out_inds
.
long
()
...
@@ -407,6 +419,8 @@ def sparse_add_hash_based(*tens: SparseConvTensor):
...
@@ -407,6 +419,8 @@ def sparse_add_hash_based(*tens: SparseConvTensor):
out_indices
[
out_inds
]
=
ten
.
indices
out_indices
[
out_inds
]
=
ten
.
indices
res
=
SparseConvTensor
(
out_features
,
out_indices
,
first
.
spatial_shape
,
first
.
batch_size
,
res
=
SparseConvTensor
(
out_features
,
out_indices
,
first
.
spatial_shape
,
first
.
batch_size
,
benchmark
=
first
.
benchmark
)
benchmark
=
first
.
benchmark
)
if
count_val
==
max_num_indices
:
res
.
indice_dict
=
tens
[
max_num_indices_idx
].
indice_dict
res
.
benchmark_record
=
first
.
benchmark_record
res
.
benchmark_record
=
first
.
benchmark_record
res
.
_timer
=
first
.
_timer
res
.
_timer
=
first
.
_timer
res
.
thrust_allocator
=
first
.
thrust_allocator
res
.
thrust_allocator
=
first
.
thrust_allocator
...
...
version.txt
View file @
591bbc87
2.1.15
2.1.16
\ No newline at end of file
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment