fix strange windows problem

591bbc87 · yan.yan · c8df384c · 591bbc87 · 591bbc87 · 591bbc87
Commit 591bbc87 authored Nov 28, 2021 by yan.yan
Showing with 99 additions and 50 deletions

CHANGELOG.md CHANGELOG.md +4 -0

spconv/csrc/hash/core.py spconv/csrc/hash/core.py +78 -47

spconv/pytorch/functional.py spconv/pytorch/functional.py +16 -2

version.txt version.txt +1 -1

No files found.
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
 # Changelog
+## [2.1.16] - 2021-11-28
+### Fixed
+- Fix strange compile problem in windows
 ## [2.1.15] - 2021-11-28
 ### Fixed
 - Fix missing pccm.Class in setup.py

--- a/spconv/csrc/hash/core.py
+++ b/spconv/csrc/hash/core.py
@@ -14,9 +14,11 @@
 import os
 from pathlib import Path
+from typing import List
 from cumm.constants import CUMM_CPU_ONLY_BUILD
 import pccm
+from cumm import dtypes
 from cumm.common import (TensorView, TensorViewCPU, TensorViewHashKernel,
                         TensorViewKernel, TslRobinMap)
 from spconv.csrc.sparse.cpu_core import OMPLib
@@ -26,6 +28,32 @@ if CUMM_CPU_ONLY_BUILD:
 else:
    _member_func = pccm.cuda.member_function
+def _dispatch_ints(code: pccm.FunctionCode, ints: List[int], var: str):
+    for i, val in enumerate(ints):
+        if i == 0:
+            with code.if_(f"{var} == {val}"):
+                yield val 
+        else:
+            with code.else_if_(f"{var} == {val}"):
+                yield val 
+    with code.else_():
+        code.raw(f"""
+        TV_THROW_RT_ERR("unknown val {var}, available: {ints}")
+        """)
+def _dispatch(code: pccm.FunctionCode, dts: List[dtypes.DType], var: str):
+    for i, dtype in enumerate(dts):
+        if i == 0:
+            with code.if_(f"{var} == tv::DType({dtype.tv_dtype})"):
+                yield dtype 
+        else:
+            with code.else_if_(f"{var} == tv::DType({dtype.tv_dtype})"):
+                yield dtype 
+    with code.else_():
+        code.raw(f"""
+        TV_THROW_RT_ERR("unknown dtype {var}, available: {dts}")
+        """)
 class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
    """a simple hashtable for both cpu and cuda.
@@ -107,14 +135,17 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
            with code.else_():
                code.raw(f"""
                auto custream = reinterpret_cast<cudaStream_t>(stream);
-                tv::dispatch_int<4, 8>(keys_data.itemsize(), [&](auto IK){{
+                """)
-                    constexpr int IKV = TV_DECLTYPE(IK)::value;
+                for k_items in _dispatch_ints(code, [4, 8], "keys_data.itemsize()"):
-                    using K = tv::hash::itemsize_to_unsigned_t<IKV>;
+                    code.raw(f"""
+                    using K = tv::hash::itemsize_to_unsigned_t<{k_items}>;
                    constexpr K kEmptyKey = std::numeric_limits<K>::max();
                    K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data());
-                    tv::dispatch_int<4, 8>(values_data.itemsize(), [&](auto IV){{
+                    """)
-                        constexpr int IVV = TV_DECLTYPE(IV)::value;
+                    for v_items in _dispatch_ints(code, [4, 8], "values_data.itemsize()"):
-                        using V = tv::hash::itemsize_to_unsigned_t<IVV>;
+                        code.raw(f"""
+                        using V = tv::hash::itemsize_to_unsigned_t<{v_items}>;
                        V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data());
                        using table_t =
                            tv::hash::LinearHashTableSplit<K, V, tv::hash::Murmur3Hash<K>,
@@ -122,9 +153,7 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
                        table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0));
                        tv::cuda::Launch launcher(table.size(), custream);
                        launcher(tv::hash::clear_table_split<table_t>, table);
-                    }});
+                        """)
-                }});
-                """)
        return code 
@@ -174,26 +203,29 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
            with code.else_():
                code.raw(f"""
                auto custream = reinterpret_cast<cudaStream_t>(stream);
-                tv::dispatch_int<4, 8>(keys_data.itemsize(), [&](auto IK){{
+                """)
-                    constexpr int IKV = TV_DECLTYPE(IK)::value;
+                for k_items in _dispatch_ints(code, [4, 8], "keys_data.itemsize()"):
-                    using K = tv::hash::itemsize_to_unsigned_t<IKV>;
+                    code.raw(f"""
+                    using K = tv::hash::itemsize_to_unsigned_t<{k_items}>;
                    constexpr K kEmptyKey = std::numeric_limits<K>::max();
                    K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data());
                    const K* key_ptr = reinterpret_cast<const K*>(keys.raw_data());
-                    tv::dispatch_int<4, 8>(values_data.itemsize(), [&](auto IV){{
-                        constexpr int IVV = TV_DECLTYPE(IV)::value;
+                    """)
-                        using V = tv::hash::itemsize_to_unsigned_t<IVV>;
+                    for v_items in _dispatch_ints(code, [4, 8], "values_data.itemsize()"):
+                        code.raw(f"""
+                        using V = tv::hash::itemsize_to_unsigned_t<{v_items}>;
                        V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data());
                        const V* value_ptr = reinterpret_cast<const V*>(values.raw_data());
                        using table_t =
                            tv::hash::LinearHashTableSplit<K, V, tv::hash::Murmur3Hash<K>,
                                                        kEmptyKey, false>;
                        tv::cuda::Launch launcher(N, custream);
                        table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0));
                        launcher(tv::hash::insert_split<table_t>, table, key_ptr, value_ptr, size_t(N));
-                    }});
+                        """)
-                }});
-                """)
        else:
            code.raw(f"""
            TV_THROW_RT_ERR("spconv not compiled with cuda, don't support cuda");
@@ -244,17 +276,18 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
            with code.else_():
                code.raw(f"""
                auto custream = reinterpret_cast<cudaStream_t>(stream);
-                tv::dispatch_int<4, 8>(keys_data.itemsize(), [&](auto IK){{
+                """)
-                    constexpr int IKV = TV_DECLTYPE(IK)::value;
+                for k_items in _dispatch_ints(code, [4, 8], "keys_data.itemsize()"):
+                    code.raw(f"""
-                    using K = tv::hash::itemsize_to_unsigned_t<IKV>;
+                    using K = tv::hash::itemsize_to_unsigned_t<{k_items}>;
                    constexpr K kEmptyKey = std::numeric_limits<K>::max();
                    K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data());
                    K* key_ptr = reinterpret_cast<K*>(keys.raw_data());
-                    tv::dispatch_int<4, 8>(values_data.itemsize(), [&](auto IV){{
-                        constexpr int IVV = TV_DECLTYPE(IV)::value;
-                        using V = tv::hash::itemsize_to_unsigned_t<IVV>;
+                    """)
+                    for v_items in _dispatch_ints(code, [4, 8], "values_data.itemsize()"):
+                        code.raw(f"""
+                        using V = tv::hash::itemsize_to_unsigned_t<{v_items}>;
                        V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data());
                        V* value_ptr = reinterpret_cast<V*>(values.raw_data());
                        using table_t =
@@ -263,9 +296,7 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
                        tv::cuda::Launch launcher(N, custream);
                        table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0));
                        launcher(tv::hash::query_split<table_t>, table, key_ptr, value_ptr, is_empty_ptr, size_t(N));
-                    }});
+                        """)
-                }});
-                """)
        else:
            code.raw(f"""
            TV_THROW_RT_ERR("spconv not compiled with cuda, don't support cuda");
@@ -302,15 +333,19 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
                code.raw(f"""
                TV_ASSERT_RT_ERR(count.device() == 0, "count must be cuda");
                auto custream = reinterpret_cast<cudaStream_t>(stream);
-                tv::dispatch_int<4, 8>(keys_data.itemsize(), [&](auto IK){{
+                """)
-                    constexpr int IKV = TV_DECLTYPE(IK)::value;
+                for k_items in _dispatch_ints(code, [4, 8], "keys_data.itemsize()"):
-                    using K = tv::hash::itemsize_to_unsigned_t<IKV>;
+                    code.raw(f"""
+                    using K = tv::hash::itemsize_to_unsigned_t<{k_items}>;
                    constexpr K kEmptyKey = std::numeric_limits<K>::max();
                    auto count_ptr = count.data_ptr<K>();
                    K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data());
-                    tv::dispatch<int32_t, int64_t, uint32_t, uint64_t>(values_data.dtype(), [&](auto IV){{
+                    """)
-                        using V = TV_DECLTYPE(IV);
+                    val_dtypes = [dtypes.int32, dtypes.int64, dtypes.uint32, dtypes.uint64]
+                    for v_dtype in _dispatch(code, val_dtypes, "values_data.dtype()"):
+                        code.raw(f"""
+                        using V = {v_dtype};
                        V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data());
                        using table_t =
                            tv::hash::LinearHashTableSplit<K, V, tv::hash::Murmur3Hash<K>,
@@ -318,9 +353,7 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
                        table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0));
                        tv::cuda::Launch launcher(table.size(), custream);
                        launcher(tv::hash::assign_arange_split<table_t, K>, table, count_ptr);
-                    }});
+                        """)
-                }});
-                """)
        else:
            code.raw(f"""
            TV_THROW_RT_ERR("spconv not compiled with cuda, don't support cuda");
@@ -389,20 +422,20 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
            with code.else_():
                code.raw(f"""
                auto custream = reinterpret_cast<cudaStream_t>(stream);
-                tv::dispatch_int<4, 8>(keys_data.itemsize(), [&](auto IK){{
+                """)
-                    constexpr int IKV = TV_DECLTYPE(IK)::value;
+                for k_items in _dispatch_ints(code, [4, 8], "keys_data.itemsize()"):
+                    code.raw(f"""
-                    using K = tv::hash::itemsize_to_unsigned_t<IKV>;
+                    using K = tv::hash::itemsize_to_unsigned_t<{k_items}>;
                    auto count_ptr = count.data_ptr<K>();
                    constexpr K kEmptyKey = std::numeric_limits<K>::max();
                    K* key_data_ptr = reinterpret_cast<K*>(keys_data.raw_data());
                    K* key_ptr = reinterpret_cast<K*>(keys.raw_data());
-                    tv::dispatch_int<4, 8>(values_data.itemsize(), [&](auto IV){{
-                        constexpr int IVV = TV_DECLTYPE(IV)::value;
-                        using V = tv::hash::itemsize_to_unsigned_t<IVV>;
+                    """)
+                    for v_items in _dispatch_ints(code, [4, 8], "values_data.itemsize()"):
+                        code.raw(f"""
+                        using V = tv::hash::itemsize_to_unsigned_t<{v_items}>;
                        V* value_data_ptr = reinterpret_cast<V*>(values_data.raw_data());
                        V* value_ptr = reinterpret_cast<V*>(values.raw_data());
                        using table_t =
@@ -411,9 +444,7 @@ class HashTable(pccm.Class, pccm.pybind.PybindClassMixin):
                        tv::cuda::Launch launcher(N, custream);
                        table_t table(key_data_ptr, value_data_ptr, keys_data.dim(0));
                        launcher(tv::hash::iterate_table_split<table_t, K>, table, key_ptr, value_ptr, size_t(N), count_ptr);
-                    }});
+                        """)
-                }});
-                """)
        else:
            code.raw(f"""
            TV_THROW_RT_ERR("spconv not compiled with cuda, don't support cuda");

--- a/spconv/pytorch/functional.py
+++ b/spconv/pytorch/functional.py
@@ -371,12 +371,25 @@ def _indice_to_scalar(indices: torch.Tensor, shape: List[int]):
    return scalar_inds.contiguous()
 def sparse_add_hash_based(*tens: SparseConvTensor):
+    """ sparse add with misaligned indices.
+    if you use sparse add, the indice_dict will be dropped and impossible
+    to use inverse.
+    There is only one situation that keep indices: there is one operand that
+    its indices is output indices.
+    """
    table_size = 0
-    for ten in tens:
+    max_num_indices = 0
+    max_num_indices_idx = 0
+    for i, ten in enumerate(tens):
        assert ten.spatial_shape == tens[0].spatial_shape
        assert ten.batch_size == tens[0].batch_size
        assert ten.features.shape[1] == tens[0].features.shape[1]
        table_size += ten.features.shape[0]
+        if max_num_indices < ten.features.shape[0]:
+            max_num_indices_idx = i
+            max_num_indices = ten.features.shape[0]
    first = tens[0]
    feat = first.features
    shape = [first.batch_size, *first.spatial_shape]
@@ -399,7 +412,6 @@ def sparse_add_hash_based(*tens: SparseConvTensor):
    count_val = count.item()
    out_features = torch.zeros([int(count_val), feat.shape[1]], dtype=feat.dtype, device=feat.device)
    out_indices = torch.zeros([int(count_val), first.indices.shape[1]], dtype=first.indices.dtype, device=first.indices.device)
    for ten, scalar in zip(tens, scalars):
        out_inds, _ = table.query(scalar)
        out_inds = out_inds.long()
@@ -407,6 +419,8 @@ def sparse_add_hash_based(*tens: SparseConvTensor):
        out_indices[out_inds] = ten.indices
    res = SparseConvTensor(out_features, out_indices, first.spatial_shape, first.batch_size, 
        benchmark=first.benchmark)
+    if count_val == max_num_indices:
+        res.indice_dict = tens[max_num_indices_idx].indice_dict
    res.benchmark_record = first.benchmark_record
    res._timer = first._timer 
    res.thrust_allocator = first.thrust_allocator

--- a/version.txt
+++ b/version.txt
-2.1.15
+2.1.16
\ No newline at end of file