Commit 591bbc87 authored by yan.yan's avatar yan.yan
Browse files

fix strange windows problem

parent c8df384c
# Changelog # Changelog
## [2.1.16] - 2021-11-28
### Fixed
- Fix strange compile problem in windows
## [2.1.15] - 2021-11-28 ## [2.1.15] - 2021-11-28
### Fixed ### Fixed
- Fix missing pccm.Class in setup.py - Fix missing pccm.Class in setup.py
......
...@@ -14,9 +14,11 @@ ...@@ -14,9 +14,11 @@
import os import os
from pathlib import Path from pathlib import Path
from typing import List
from cumm.constants import CUMM_CPU_ONLY_BUILD from cumm.constants import CUMM_CPU_ONLY_BUILD
import pccm import pccm
from cumm import dtypes
from cumm.common import (TensorView, TensorViewCPU, TensorViewHashKernel, from cumm.common import (TensorView, TensorViewCPU, TensorViewHashKernel,
TensorViewKernel, TslRobinMap) TensorViewKernel, TslRobinMap)
from spconv.csrc.sparse.cpu_core import OMPLib from spconv.csrc.sparse.cpu_core import OMPLib
...@@ -26,6 +28,32 @@ if CUMM_CPU_ONLY_BUILD: ...@@ -26,6 +28,32 @@ if CUMM_CPU_ONLY_BUILD:
else: else:
_member_func = pccm.cuda.member_function _member_func = pccm.cuda.member_function
def _dispatch_ints(code: pccm.FunctionCode, ints: List[int], var: str):
for i, val in enumerate(ints):
if i == 0:
with code.if_(f"{var} == {val}"):
yield val
else:
with code.else_if_(f"{var} == {val}"):
yield val
with code.else_():
code.raw(f"""
TV_THROW_RT_ERR("unknown val {var}, available: {ints}")
""")
def _dispatch(code: pccm.FunctionCode, dts: List[dtypes.DType], var: str):
for i, dtype in enumerate(dts):
if i == 0:
with code.if_(f"{var} == tv::DType({dtype.tv_dtype})"):
yield dtype
else:
with code.else_if_(f"{var} == tv::DType({dtype.tv_dtype})"):
yield dtype
with code.else_():
code.raw(f"""
TV_THROW_RT_ERR("unknown dtype {var}, available: {dts}")
""")
class HashTable(pccm.Class, pccm.pybind.PybindClassMixin): class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
"""a simple hashtable for both cpu and cuda. """a simple hashtable for both cpu and cuda.
...@@ -107,14 +135,17 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin): ...@@ -107,14 +135,17 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
with code.else_(): with code.else_():
code.raw(f""" code.raw(f"""
auto custream = reinterpret_cast<cudaStream_t>(stream); auto custream = reinterpret_cast<cudaStream_t>(stream);
tv::dispatch_int<4, 8>(keys_data.itemsize(), [&](auto IK){{ """)
constexpr int IKV = TV_DECLTYPE(IK)::value; for k_items in _dispatch_ints(code, [4, 8], "keys_data.itemsize()"):
using K = tv::hash::itemsize_to_unsigned_t<IKV>; code.raw(f"""
using K = tv::hash::itemsize_to_unsigned_t<{k_items}>;
constexpr K kEmptyKey = std::numeric_limits<K>::max(); constexpr K kEmptyKey = std::numeric_limits<K>::max();
K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data()); K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data());
tv::dispatch_int<4, 8>(values_data.itemsize(), [&](auto IV){{ """)
constexpr int IVV = TV_DECLTYPE(IV)::value; for v_items in _dispatch_ints(code, [4, 8], "values_data.itemsize()"):
using V = tv::hash::itemsize_to_unsigned_t<IVV>; code.raw(f"""
using V = tv::hash::itemsize_to_unsigned_t<{v_items}>;
V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data()); V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data());
using table_t = using table_t =
tv::hash::LinearHashTableSplit<K, V, tv::hash::Murmur3Hash<K>, tv::hash::LinearHashTableSplit<K, V, tv::hash::Murmur3Hash<K>,
...@@ -122,9 +153,7 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin): ...@@ -122,9 +153,7 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0)); table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0));
tv::cuda::Launch launcher(table.size(), custream); tv::cuda::Launch launcher(table.size(), custream);
launcher(tv::hash::clear_table_split<table_t>, table); launcher(tv::hash::clear_table_split<table_t>, table);
}}); """)
}});
""")
return code return code
...@@ -174,26 +203,29 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin): ...@@ -174,26 +203,29 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
with code.else_(): with code.else_():
code.raw(f""" code.raw(f"""
auto custream = reinterpret_cast<cudaStream_t>(stream); auto custream = reinterpret_cast<cudaStream_t>(stream);
tv::dispatch_int<4, 8>(keys_data.itemsize(), [&](auto IK){{ """)
constexpr int IKV = TV_DECLTYPE(IK)::value; for k_items in _dispatch_ints(code, [4, 8], "keys_data.itemsize()"):
using K = tv::hash::itemsize_to_unsigned_t<IKV>; code.raw(f"""
using K = tv::hash::itemsize_to_unsigned_t<{k_items}>;
constexpr K kEmptyKey = std::numeric_limits<K>::max(); constexpr K kEmptyKey = std::numeric_limits<K>::max();
K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data()); K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data());
const K* key_ptr = reinterpret_cast<const K*>(keys.raw_data()); const K* key_ptr = reinterpret_cast<const K*>(keys.raw_data());
tv::dispatch_int<4, 8>(values_data.itemsize(), [&](auto IV){{
constexpr int IVV = TV_DECLTYPE(IV)::value; """)
using V = tv::hash::itemsize_to_unsigned_t<IVV>; for v_items in _dispatch_ints(code, [4, 8], "values_data.itemsize()"):
code.raw(f"""
using V = tv::hash::itemsize_to_unsigned_t<{v_items}>;
V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data()); V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data());
const V* value_ptr = reinterpret_cast<const V*>(values.raw_data()); const V* value_ptr = reinterpret_cast<const V*>(values.raw_data());
using table_t = using table_t =
tv::hash::LinearHashTableSplit<K, V, tv::hash::Murmur3Hash<K>, tv::hash::LinearHashTableSplit<K, V, tv::hash::Murmur3Hash<K>,
kEmptyKey, false>; kEmptyKey, false>;
tv::cuda::Launch launcher(N, custream); tv::cuda::Launch launcher(N, custream);
table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0)); table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0));
launcher(tv::hash::insert_split<table_t>, table, key_ptr, value_ptr, size_t(N)); launcher(tv::hash::insert_split<table_t>, table, key_ptr, value_ptr, size_t(N));
}}); """)
}});
""")
else: else:
code.raw(f""" code.raw(f"""
TV_THROW_RT_ERR("spconv not compiled with cuda, don't support cuda"); TV_THROW_RT_ERR("spconv not compiled with cuda, don't support cuda");
...@@ -244,17 +276,18 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin): ...@@ -244,17 +276,18 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
with code.else_(): with code.else_():
code.raw(f""" code.raw(f"""
auto custream = reinterpret_cast<cudaStream_t>(stream); auto custream = reinterpret_cast<cudaStream_t>(stream);
tv::dispatch_int<4, 8>(keys_data.itemsize(), [&](auto IK){{ """)
constexpr int IKV = TV_DECLTYPE(IK)::value; for k_items in _dispatch_ints(code, [4, 8], "keys_data.itemsize()"):
code.raw(f"""
using K = tv::hash::itemsize_to_unsigned_t<IKV>; using K = tv::hash::itemsize_to_unsigned_t<{k_items}>;
constexpr K kEmptyKey = std::numeric_limits<K>::max(); constexpr K kEmptyKey = std::numeric_limits<K>::max();
K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data()); K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data());
K* key_ptr = reinterpret_cast<K*>(keys.raw_data()); K* key_ptr = reinterpret_cast<K*>(keys.raw_data());
tv::dispatch_int<4, 8>(values_data.itemsize(), [&](auto IV){{
constexpr int IVV = TV_DECLTYPE(IV)::value;
using V = tv::hash::itemsize_to_unsigned_t<IVV>; """)
for v_items in _dispatch_ints(code, [4, 8], "values_data.itemsize()"):
code.raw(f"""
using V = tv::hash::itemsize_to_unsigned_t<{v_items}>;
V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data()); V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data());
V* value_ptr = reinterpret_cast<V*>(values.raw_data()); V* value_ptr = reinterpret_cast<V*>(values.raw_data());
using table_t = using table_t =
...@@ -263,9 +296,7 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin): ...@@ -263,9 +296,7 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
tv::cuda::Launch launcher(N, custream); tv::cuda::Launch launcher(N, custream);
table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0)); table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0));
launcher(tv::hash::query_split<table_t>, table, key_ptr, value_ptr, is_empty_ptr, size_t(N)); launcher(tv::hash::query_split<table_t>, table, key_ptr, value_ptr, is_empty_ptr, size_t(N));
}}); """)
}});
""")
else: else:
code.raw(f""" code.raw(f"""
TV_THROW_RT_ERR("spconv not compiled with cuda, don't support cuda"); TV_THROW_RT_ERR("spconv not compiled with cuda, don't support cuda");
...@@ -302,15 +333,19 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin): ...@@ -302,15 +333,19 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
code.raw(f""" code.raw(f"""
TV_ASSERT_RT_ERR(count.device() == 0, "count must be cuda"); TV_ASSERT_RT_ERR(count.device() == 0, "count must be cuda");
auto custream = reinterpret_cast<cudaStream_t>(stream); auto custream = reinterpret_cast<cudaStream_t>(stream);
tv::dispatch_int<4, 8>(keys_data.itemsize(), [&](auto IK){{ """)
constexpr int IKV = TV_DECLTYPE(IK)::value; for k_items in _dispatch_ints(code, [4, 8], "keys_data.itemsize()"):
using K = tv::hash::itemsize_to_unsigned_t<IKV>; code.raw(f"""
using K = tv::hash::itemsize_to_unsigned_t<{k_items}>;
constexpr K kEmptyKey = std::numeric_limits<K>::max(); constexpr K kEmptyKey = std::numeric_limits<K>::max();
auto count_ptr = count.data_ptr<K>(); auto count_ptr = count.data_ptr<K>();
K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data()); K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data());
tv::dispatch<int32_t, int64_t, uint32_t, uint64_t>(values_data.dtype(), [&](auto IV){{ """)
using V = TV_DECLTYPE(IV); val_dtypes = [dtypes.int32, dtypes.int64, dtypes.uint32, dtypes.uint64]
for v_dtype in _dispatch(code, val_dtypes, "values_data.dtype()"):
code.raw(f"""
using V = {v_dtype};
V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data()); V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data());
using table_t = using table_t =
tv::hash::LinearHashTableSplit<K, V, tv::hash::Murmur3Hash<K>, tv::hash::LinearHashTableSplit<K, V, tv::hash::Murmur3Hash<K>,
...@@ -318,9 +353,7 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin): ...@@ -318,9 +353,7 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0)); table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0));
tv::cuda::Launch launcher(table.size(), custream); tv::cuda::Launch launcher(table.size(), custream);
launcher(tv::hash::assign_arange_split<table_t, K>, table, count_ptr); launcher(tv::hash::assign_arange_split<table_t, K>, table, count_ptr);
}}); """)
}});
""")
else: else:
code.raw(f""" code.raw(f"""
TV_THROW_RT_ERR("spconv not compiled with cuda, don't support cuda"); TV_THROW_RT_ERR("spconv not compiled with cuda, don't support cuda");
...@@ -389,20 +422,20 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin): ...@@ -389,20 +422,20 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
with code.else_(): with code.else_():
code.raw(f""" code.raw(f"""
auto custream = reinterpret_cast<cudaStream_t>(stream); auto custream = reinterpret_cast<cudaStream_t>(stream);
tv::dispatch_int<4, 8>(keys_data.itemsize(), [&](auto IK){{ """)
constexpr int IKV = TV_DECLTYPE(IK)::value; for k_items in _dispatch_ints(code, [4, 8], "keys_data.itemsize()"):
code.raw(f"""
using K = tv::hash::itemsize_to_unsigned_t<IKV>; using K = tv::hash::itemsize_to_unsigned_t<{k_items}>;
auto count_ptr = count.data_ptr<K>(); auto count_ptr = count.data_ptr<K>();
constexpr K kEmptyKey = std::numeric_limits<K>::max(); constexpr K kEmptyKey = std::numeric_limits<K>::max();
K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data()); K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data());
K* key_ptr = reinterpret_cast<K*>(keys.raw_data()); K* key_ptr = reinterpret_cast<K*>(keys.raw_data());
tv::dispatch_int<4, 8>(values_data.itemsize(), [&](auto IV){{
constexpr int IVV = TV_DECLTYPE(IV)::value;
using V = tv::hash::itemsize_to_unsigned_t<IVV>;
""")
for v_items in _dispatch_ints(code, [4, 8], "values_data.itemsize()"):
code.raw(f"""
using V = tv::hash::itemsize_to_unsigned_t<{v_items}>;
V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data()); V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data());
V* value_ptr = reinterpret_cast<V*>(values.raw_data()); V* value_ptr = reinterpret_cast<V*>(values.raw_data());
using table_t = using table_t =
...@@ -411,9 +444,7 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin): ...@@ -411,9 +444,7 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
tv::cuda::Launch launcher(N, custream); tv::cuda::Launch launcher(N, custream);
table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0)); table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0));
launcher(tv::hash::iterate_table_split<table_t, K>, table, key_ptr, value_ptr, size_t(N), count_ptr); launcher(tv::hash::iterate_table_split<table_t, K>, table, key_ptr, value_ptr, size_t(N), count_ptr);
}}); """)
}});
""")
else: else:
code.raw(f""" code.raw(f"""
TV_THROW_RT_ERR("spconv not compiled with cuda, don't support cuda"); TV_THROW_RT_ERR("spconv not compiled with cuda, don't support cuda");
......
...@@ -371,12 +371,25 @@ def _indice_to_scalar(indices: torch.Tensor, shape: List[int]): ...@@ -371,12 +371,25 @@ def _indice_to_scalar(indices: torch.Tensor, shape: List[int]):
return scalar_inds.contiguous() return scalar_inds.contiguous()
def sparse_add_hash_based(*tens: SparseConvTensor): def sparse_add_hash_based(*tens: SparseConvTensor):
""" sparse add with misaligned indices.
if you use sparse add, the indice_dict will be dropped and impossible
to use inverse.
There is only one situation that keep indices: there is one operand that
its indices is output indices.
"""
table_size = 0 table_size = 0
for ten in tens: max_num_indices = 0
max_num_indices_idx = 0
for i, ten in enumerate(tens):
assert ten.spatial_shape == tens[0].spatial_shape assert ten.spatial_shape == tens[0].spatial_shape
assert ten.batch_size == tens[0].batch_size assert ten.batch_size == tens[0].batch_size
assert ten.features.shape[1] == tens[0].features.shape[1] assert ten.features.shape[1] == tens[0].features.shape[1]
table_size += ten.features.shape[0] table_size += ten.features.shape[0]
if max_num_indices < ten.features.shape[0]:
max_num_indices_idx = i
max_num_indices = ten.features.shape[0]
first = tens[0] first = tens[0]
feat = first.features feat = first.features
shape = [first.batch_size, *first.spatial_shape] shape = [first.batch_size, *first.spatial_shape]
...@@ -399,7 +412,6 @@ def sparse_add_hash_based(*tens: SparseConvTensor): ...@@ -399,7 +412,6 @@ def sparse_add_hash_based(*tens: SparseConvTensor):
count_val = count.item() count_val = count.item()
out_features = torch.zeros([int(count_val), feat.shape[1]], dtype=feat.dtype, device=feat.device) out_features = torch.zeros([int(count_val), feat.shape[1]], dtype=feat.dtype, device=feat.device)
out_indices = torch.zeros([int(count_val), first.indices.shape[1]], dtype=first.indices.dtype, device=first.indices.device) out_indices = torch.zeros([int(count_val), first.indices.shape[1]], dtype=first.indices.dtype, device=first.indices.device)
for ten, scalar in zip(tens, scalars): for ten, scalar in zip(tens, scalars):
out_inds, _ = table.query(scalar) out_inds, _ = table.query(scalar)
out_inds = out_inds.long() out_inds = out_inds.long()
...@@ -407,6 +419,8 @@ def sparse_add_hash_based(*tens: SparseConvTensor): ...@@ -407,6 +419,8 @@ def sparse_add_hash_based(*tens: SparseConvTensor):
out_indices[out_inds] = ten.indices out_indices[out_inds] = ten.indices
res = SparseConvTensor(out_features, out_indices, first.spatial_shape, first.batch_size, res = SparseConvTensor(out_features, out_indices, first.spatial_shape, first.batch_size,
benchmark=first.benchmark) benchmark=first.benchmark)
if count_val == max_num_indices:
res.indice_dict = tens[max_num_indices_idx].indice_dict
res.benchmark_record = first.benchmark_record res.benchmark_record = first.benchmark_record
res._timer = first._timer res._timer = first._timer
res.thrust_allocator = first.thrust_allocator res.thrust_allocator = first.thrust_allocator
......
2.1.15 2.1.16
\ No newline at end of file \ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment