spconv v1.1 release:

1. add cuda hash support for cuda indice generation. 2. use hash table instead of dense table in CPU code. 3. add CPU-only build support.

spconv v1.1 release:
1. add cuda hash support for cuda indice generation. 2. use hash table instead of dense table in CPU code. 3. add CPU-only build support.
a6ae8967 · traveller59 · 0757c45b · a6ae8967 · a6ae8967 · a6ae8967
Commit a6ae8967 authored May 24, 2019 by traveller59
20 changed files
--- a/include/tsl/robin_growth_policy.h
+++ b/include/tsl/robin_growth_policy.h
+/**
+ * MIT License
+ * 
+ * Copyright (c) 2017 Tessil
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef TSL_ROBIN_GROWTH_POLICY_H
+#define TSL_ROBIN_GROWTH_POLICY_H 
+#include <algorithm>
+#include <array>
+#include <climits>
+#include <cmath>
+#include <cstddef>
+#include <iterator>
+#include <limits>
+#include <ratio>
+#include <stdexcept>
+#ifdef TSL_DEBUG
+#    define tsl_rh_assert(expr) assert(expr)
+#else
+#    define tsl_rh_assert(expr) (static_cast<void>(0))
+#endif
+/**
+ * If exceptions are enabled, throw the exception passed in parameter, otherwise call std::terminate.
+ */
+#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (defined (_MSC_VER) && defined (_CPPUNWIND))) && !defined(TSL_NO_EXCEPTIONS)
+#    define TSL_RH_THROW_OR_TERMINATE(ex, msg) throw ex(msg)
+#else
+#    ifdef NDEBUG
+#        define TSL_RH_THROW_OR_TERMINATE(ex, msg) std::terminate()
+#    else
+#        include <cstdio>
+#        define TSL_RH_THROW_OR_TERMINATE(ex, msg) do { std::fprintf(stderr, msg); std::terminate(); } while(0)
+#    endif
+#endif
+#if defined(__GNUC__) || defined(__clang__)
+#    define TSL_RH_LIKELY(exp) (__builtin_expect(!!(exp), true))
+#else
+#    define TSL_RH_LIKELY(exp) (exp)
+#endif
+namespace tsl {
+namespace rh {
+/**
+ * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a power of two. It allows
+ * the table to use a mask operation instead of a modulo operation to map a hash to a bucket.
+ * 
+ * GrowthFactor must be a power of two >= 2.
+ */
+template<std::size_t GrowthFactor>
+class power_of_two_growth_policy {
+public:
+    /**
+     * Called on the hash table creation and on rehash. The number of buckets for the table is passed in parameter.
+     * This number is a minimum, the policy may update this value with a higher value if needed (but not lower).
+     *
+     * If 0 is given, min_bucket_count_in_out must still be 0 after the policy creation and
+     * bucket_for_hash must always return 0 in this case.
+     */
+    explicit power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) {
+        if(min_bucket_count_in_out > max_bucket_count()) {
+            TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
+        }
+        if(min_bucket_count_in_out > 0) {
+            min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out);
+            m_mask = min_bucket_count_in_out - 1;
+        }
+        else {
+            m_mask = 0;
+        }
+    }
+    /**
+     * Return the bucket [0, bucket_count()) to which the hash belongs. 
+     * If bucket_count() is 0, it must always return 0.
+     */
+    std::size_t bucket_for_hash(std::size_t hash) const noexcept {
+        return hash & m_mask;
+    }
+    /**
+     * Return the number of buckets that should be used on next growth.
+     */
+    std::size_t next_bucket_count() const {
+        if((m_mask + 1) > max_bucket_count() / GrowthFactor) {
+            TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
+        }
+        return (m_mask + 1) * GrowthFactor;
+    }
+    /**
+     * Return the maximum number of buckets supported by the policy.
+     */
+    std::size_t max_bucket_count() const {
+        // Largest power of two.
+        return (std::numeric_limits<std::size_t>::max() / 2) + 1;
+    }
+    /**
+     * Reset the growth policy as if it was created with a bucket count of 0.
+     * After a clear, the policy must always return 0 when bucket_for_hash is called.
+     */
+    void clear() noexcept {
+        m_mask = 0;
+    }
+private:
+    static std::size_t round_up_to_power_of_two(std::size_t value) {
+        if(is_power_of_two(value)) {
+            return value;
+        }
+        if(value == 0) {
+            return 1;
+        }
+        --value;
+        for(std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) {
+            value |= value >> i;
+        }
+        return value + 1;
+    }
+    static constexpr bool is_power_of_two(std::size_t value) {
+        return value != 0 && (value & (value - 1)) == 0;
+    }
+protected:
+    static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2, "GrowthFactor must be a power of two >= 2.");
+    std::size_t m_mask;
+};
+/**
+ * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo to map a hash
+ * to a bucket. Slower but it can be useful if you want a slower growth.
+ */
+template<class GrowthFactor = std::ratio<3, 2>>
+class mod_growth_policy {
+public:
+    explicit mod_growth_policy(std::size_t& min_bucket_count_in_out) {
+        if(min_bucket_count_in_out > max_bucket_count()) {
+            TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
+        }
+        if(min_bucket_count_in_out > 0) {
+            m_mod = min_bucket_count_in_out;
+        }
+        else {
+            m_mod = 1;
+        }
+    }
+    std::size_t bucket_for_hash(std::size_t hash) const noexcept {
+        return hash % m_mod;
+    }
+    std::size_t next_bucket_count() const {
+        if(m_mod == max_bucket_count()) {
+            TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
+        }
+        const double next_bucket_count = std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR);
+        if(!std::isnormal(next_bucket_count)) {
+            TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
+        }
+        if(next_bucket_count > double(max_bucket_count())) {
+            return max_bucket_count();
+        }
+        else {
+            return std::size_t(next_bucket_count);
+        }
+    }
+    std::size_t max_bucket_count() const {
+        return MAX_BUCKET_COUNT;
+    }
+    void clear() noexcept {
+        m_mod = 1;
+    }
+private:
+    static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR = 1.0 * GrowthFactor::num / GrowthFactor::den;
+    static const std::size_t MAX_BUCKET_COUNT = 
+            std::size_t(double(
+                    std::numeric_limits<std::size_t>::max() / REHASH_SIZE_MULTIPLICATION_FACTOR
+            ));
+    static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1.");
+    std::size_t m_mod;
+};
+namespace detail {
+static constexpr const std::array<std::size_t, 40> PRIMES = {{
+    1ul, 5ul, 17ul, 29ul, 37ul, 53ul, 67ul, 79ul, 97ul, 131ul, 193ul, 257ul, 389ul, 521ul, 769ul, 1031ul, 
+    1543ul, 2053ul, 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 393241ul, 786433ul, 
+    1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, 
+    402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul
+}};
+template<unsigned int IPrime>
+static constexpr std::size_t mod(std::size_t hash) { return hash % PRIMES[IPrime]; }
+// MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo as the
+// compiler can optimize the modulo code better with a constant known at the compilation.
+static constexpr const std::array<std::size_t(*)(std::size_t), 40> MOD_PRIME = {{ 
+    &mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>, 
+    &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>, 
+    &mod<21>, &mod<22>, &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>, 
+    &mod<31>, &mod<32>, &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37> , &mod<38>, &mod<39>
+}};
+}
+/**
+ * Grow the hash table by using prime numbers as bucket count. Slower than tsl::rh::power_of_two_growth_policy in  
+ * general but will probably distribute the values around better in the buckets with a poor hash function.
+ * 
+ * To allow the compiler to optimize the modulo operation, a lookup table is used with constant primes numbers.
+ * 
+ * With a switch the code would look like:
+ * \code
+ * switch(iprime) { // iprime is the current prime of the hash table
+ *     case 0: hash % 5ul;
+ *             break;
+ *     case 1: hash % 17ul;
+ *             break;
+ *     case 2: hash % 29ul;
+ *             break;
+ *     ...
+ * }    
+ * \endcode
+ * 
+ * Due to the constant variable in the modulo the compiler is able to optimize the operation
+ * by a series of multiplications, substractions and shifts. 
+ * 
+ * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) * 5' in a 64 bits environement.
+ */
+class prime_growth_policy {
+public:
+    explicit prime_growth_policy(std::size_t& min_bucket_count_in_out) {
+        auto it_prime = std::lower_bound(detail::PRIMES.begin(), 
+                                         detail::PRIMES.end(), min_bucket_count_in_out);
+        if(it_prime == detail::PRIMES.end()) {
+            TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
+        }
+        m_iprime = static_cast<unsigned int>(std::distance(detail::PRIMES.begin(), it_prime));
+        if(min_bucket_count_in_out > 0) {
+            min_bucket_count_in_out = *it_prime;
+        }
+        else {
+            min_bucket_count_in_out = 0;
+        }
+    }
+    std::size_t bucket_for_hash(std::size_t hash) const noexcept {
+        return detail::MOD_PRIME[m_iprime](hash);
+    }
+    std::size_t next_bucket_count() const {
+        if(m_iprime + 1 >= detail::PRIMES.size()) {
+            TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
+        }
+        return detail::PRIMES[m_iprime + 1];
+    }   
+    std::size_t max_bucket_count() const {
+        return detail::PRIMES.back();
+    }
+    void clear() noexcept {
+        m_iprime = 0;
+    }
+private:
+    unsigned int m_iprime;
+    static_assert(std::numeric_limits<decltype(m_iprime)>::max() >= detail::PRIMES.size(), 
+                  "The type of m_iprime is not big enough.");
+}; 
+}
+}
+#endif
--- a/include/tsl/robin_hash.h
+++ b/include/tsl/robin_hash.h
+/**
+ * MIT License
+ * 
+ * Copyright (c) 2017 Tessil
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef TSL_ROBIN_HASH_H
+#define TSL_ROBIN_HASH_H 
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <exception>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <stdexcept>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
+#include "robin_growth_policy.h"
+namespace tsl {
+namespace detail_robin_hash {
+template<typename T>
+struct make_void {
+    using type = void;
+};
+template<typename T, typename = void>
+struct has_is_transparent: std::false_type {
+};
+template<typename T>
+struct has_is_transparent<T, typename make_void<typename T::is_transparent>::type>: std::true_type {
+};
+template<typename U>
+struct is_power_of_two_policy: std::false_type {
+};
+template<std::size_t GrowthFactor>
+struct is_power_of_two_policy<tsl::rh::power_of_two_growth_policy<GrowthFactor>>: std::true_type {
+};
+// Only available in C++17, we need to be compatible with C++11
+template<class T>
+const T& clamp( const T& v, const T& lo, const T& hi) {
+    return std::min(hi, std::max(lo, v));
+}
+using truncated_hash_type = std::uint_least32_t;
+/**
+ * Helper class that stores a truncated hash if StoreHash is true and nothing otherwise.
+ */
+template<bool StoreHash>
+class bucket_entry_hash {
+public:
+    bool bucket_hash_equal(std::size_t /*hash*/) const noexcept {
+        return true;
+    }
+    truncated_hash_type truncated_hash() const noexcept {
+        return 0;
+    }
+protected:
+    void set_hash(truncated_hash_type /*hash*/) noexcept {
+    }
+};
+template<>
+class bucket_entry_hash<true> {
+public:
+    bool bucket_hash_equal(std::size_t hash) const noexcept {
+        return m_hash == truncated_hash_type(hash);
+    }
+    truncated_hash_type truncated_hash() const noexcept {
+        return m_hash;
+    }
+protected:
+    void set_hash(truncated_hash_type hash) noexcept {
+        m_hash = truncated_hash_type(hash);
+    }
+private:    
+    truncated_hash_type m_hash;
+};
+/**
+ * Each bucket entry has:
+ * - A value of type `ValueType`.
+ * - An integer to store how far the value of the bucket, if any, is from its ideal bucket 
+ *   (ex: if the current bucket 5 has the value 'foo' and `hash('foo') % nb_buckets` == 3,
+ *        `dist_from_ideal_bucket()` will return 2 as the current value of the bucket is two
+ *        buckets away from its ideal bucket)
+ *   If there is no value in the bucket (i.e. `empty()` is true) `dist_from_ideal_bucket()` will be < 0.
+ * - A marker which tells us if the bucket is the last bucket of the bucket array (useful for the 
+ *   iterator of the hash table).
+ * - If `StoreHash` is true, 32 bits of the hash of the value, if any, are also stored in the bucket. 
+ *   If the size of the hash is more than 32 bits, it is truncated. We don't store the full hash
+ *   as storing the hash is a potential opportunity to use the unused space due to the alignement
+ *   of the bucket_entry structure. We can thus potentially store the hash without any extra space 
+ *   (which would not be possible with 64 bits of the hash).
+ */
+template<typename ValueType, bool StoreHash>
+class bucket_entry: public bucket_entry_hash<StoreHash> {
+    using bucket_hash = bucket_entry_hash<StoreHash>;
+public:
+    using value_type = ValueType;
+    using distance_type = std::int_least16_t;
+    bucket_entry() noexcept: bucket_hash(), m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET),
+                             m_last_bucket(false)
+    {
+        tsl_rh_assert(empty());
+    }
+    bucket_entry(bool last_bucket) noexcept: bucket_hash(), m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET),
+                                             m_last_bucket(last_bucket)
+    {
+        tsl_rh_assert(empty());
+    }
+    bucket_entry(const bucket_entry& other) noexcept(std::is_nothrow_copy_constructible<value_type>::value): 
+            bucket_hash(other),
+            m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET), 
+            m_last_bucket(other.m_last_bucket)
+    {
+        if(!other.empty()) {
+            ::new (static_cast<void*>(std::addressof(m_value))) value_type(other.value());
+            m_dist_from_ideal_bucket = other.m_dist_from_ideal_bucket;
+        }
+    }
+    /**
+     * Never really used, but still necessary as we must call resize on an empty `std::vector<bucket_entry>`.
+     * and we need to support move-only types. See robin_hash constructor for details.
+     */
+    bucket_entry(bucket_entry&& other) noexcept(std::is_nothrow_move_constructible<value_type>::value): 
+            bucket_hash(std::move(other)),
+            m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET), 
+            m_last_bucket(other.m_last_bucket) 
+    {
+        if(!other.empty()) {
+            ::new (static_cast<void*>(std::addressof(m_value))) value_type(std::move(other.value()));
+            m_dist_from_ideal_bucket = other.m_dist_from_ideal_bucket;
+        }
+    }
+    bucket_entry& operator=(const bucket_entry& other) 
+            noexcept(std::is_nothrow_copy_constructible<value_type>::value) 
+    {
+        if(this != &other) {
+            clear();
+            bucket_hash::operator=(other);
+            if(!other.empty()) {
+                ::new (static_cast<void*>(std::addressof(m_value))) value_type(other.value());
+            }
+            m_dist_from_ideal_bucket = other.m_dist_from_ideal_bucket;
+            m_last_bucket = other.m_last_bucket;
+        }
+        return *this;
+    }
+    bucket_entry& operator=(bucket_entry&& ) = delete;
+    ~bucket_entry() noexcept {
+        clear();
+    }
+    void clear() noexcept {
+        if(!empty()) {
+            destroy_value();
+            m_dist_from_ideal_bucket = EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET;
+        }
+    }
+    bool empty() const noexcept {
+        return m_dist_from_ideal_bucket == EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET;
+    }
+    value_type& value() noexcept {
+        tsl_rh_assert(!empty());
+        return *reinterpret_cast<value_type*>(std::addressof(m_value));
+    }
+    const value_type& value() const noexcept {
+        tsl_rh_assert(!empty());
+        return *reinterpret_cast<const value_type*>(std::addressof(m_value));
+    }
+    distance_type dist_from_ideal_bucket() const noexcept {
+        return m_dist_from_ideal_bucket;
+    }
+    bool last_bucket() const noexcept {
+        return m_last_bucket;
+    }
+    void set_as_last_bucket() noexcept {
+        m_last_bucket = true;
+    }
+    template<typename... Args>
+    void set_value_of_empty_bucket(distance_type dist_from_ideal_bucket, 
+                                   truncated_hash_type hash, Args&&... value_type_args) 
+    {
+        tsl_rh_assert(dist_from_ideal_bucket >= 0);
+        tsl_rh_assert(empty());
+        ::new (static_cast<void*>(std::addressof(m_value))) value_type(std::forward<Args>(value_type_args)...);
+        this->set_hash(hash);
+        m_dist_from_ideal_bucket = dist_from_ideal_bucket;
+        tsl_rh_assert(!empty());
+    }
+    void swap_with_value_in_bucket(distance_type& dist_from_ideal_bucket, 
+                                   truncated_hash_type& hash, value_type& value) 
+    {
+        tsl_rh_assert(!empty());
+        using std::swap;
+        swap(value, this->value());
+        swap(dist_from_ideal_bucket, m_dist_from_ideal_bucket);
+        // Avoid warning of unused variable if StoreHash is false
+        (void) hash;
+        if(StoreHash) {
+            const truncated_hash_type tmp_hash = this->truncated_hash();
+            this->set_hash(hash);
+            hash = tmp_hash;
+        }
+    }
+    static truncated_hash_type truncate_hash(std::size_t hash) noexcept {
+        return truncated_hash_type(hash);
+    }
+private:
+    void destroy_value() noexcept {
+        tsl_rh_assert(!empty());
+        value().~value_type();
+    }
+private:
+    using storage = typename std::aligned_storage<sizeof(value_type), alignof(value_type)>::type;
+    static const distance_type EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET = -1;
+    distance_type m_dist_from_ideal_bucket;
+    bool m_last_bucket;
+    storage m_value;
+};
+/**
+ * Internal common class used by `robin_map` and `robin_set`. 
+ * 
+ * ValueType is what will be stored by `robin_hash` (usually `std::pair<Key, T>` for map and `Key` for set).
+ * 
+ * `KeySelect` should be a `FunctionObject` which takes a `ValueType` in parameter and returns a 
+ *  reference to the key.
+ * 
+ * `ValueSelect` should be a `FunctionObject` which takes a `ValueType` in parameter and returns a 
+ *  reference to the value. `ValueSelect` should be void if there is no value (in a set for example).
+ * 
+ * The strong exception guarantee only holds if the expression 
+ * `std::is_nothrow_swappable<ValueType>::value && std::is_nothrow_move_constructible<ValueType>::value` is true.
+ * 
+ * Behaviour is undefined if the destructor of `ValueType` throws.
+ */
+template<class ValueType,
+         class KeySelect,
+         class ValueSelect,
+         class Hash,
+         class KeyEqual,
+         class Allocator,
+         bool StoreHash,
+         class GrowthPolicy>
+class robin_hash: private Hash, private KeyEqual, private GrowthPolicy {
+private:    
+    template<typename U>
+    using has_mapped_type = typename std::integral_constant<bool, !std::is_same<U, void>::value>;
+    static_assert(noexcept(std::declval<GrowthPolicy>().bucket_for_hash(std::size_t(0))), "GrowthPolicy::bucket_for_hash must be noexcept.");
+    static_assert(noexcept(std::declval<GrowthPolicy>().clear()), "GrowthPolicy::clear must be noexcept.");
+public:
+    template<bool IsConst>
+    class robin_iterator;
+    using key_type = typename KeySelect::key_type;
+    using value_type = ValueType;
+    using size_type = std::size_t;
+    using difference_type = std::ptrdiff_t;
+    using hasher = Hash;
+    using key_equal = KeyEqual;
+    using allocator_type = Allocator;
+    using reference = value_type&;
+    using const_reference = const value_type&;
+    using pointer = value_type*;
+    using const_pointer = const value_type*;
+    using iterator = robin_iterator<false>;
+    using const_iterator = robin_iterator<true>;
+private:
+    /**
+     * Either store the hash because we are asked by the `StoreHash` template parameter
+     * or store the hash because it doesn't cost us anything in size and can be used to speed up rehash.
+     */
+    static constexpr bool STORE_HASH = StoreHash || 
+                                       (
+                                         (sizeof(tsl::detail_robin_hash::bucket_entry<value_type, true>) ==
+                                          sizeof(tsl::detail_robin_hash::bucket_entry<value_type, false>))
+                                         &&
+                                         (sizeof(std::size_t) == sizeof(truncated_hash_type) ||
+                                          is_power_of_two_policy<GrowthPolicy>::value)
+                                         &&
+                                          // Don't store the hash for primitive types with default hash.
+                                          (!std::is_arithmetic<key_type>::value ||
+                                           !std::is_same<Hash, std::hash<key_type>>::value)
+                                       );
+    /**
+     * Only use the stored hash on lookup if we are explictly asked. We are not sure how slow
+     * the KeyEqual operation is. An extra comparison may slow things down with a fast KeyEqual.
+     */
+    static constexpr bool USE_STORED_HASH_ON_LOOKUP = StoreHash;
+    /**
+     * We can only use the hash on rehash if the size of the hash type is the same as the stored one or
+     * if we use a power of two modulo. In the case of the power of two modulo, we just mask
+     * the least significant bytes, we just have to check that the truncated_hash_type didn't truncated
+     * more bytes.
+     */
+    static bool USE_STORED_HASH_ON_REHASH(size_type bucket_count) {
+        (void) bucket_count;
+        if(STORE_HASH && sizeof(std::size_t) == sizeof(truncated_hash_type)) {
+            return true;
+        }
+        else if(STORE_HASH && is_power_of_two_policy<GrowthPolicy>::value) {
+            tsl_rh_assert(bucket_count > 0);
+            return (bucket_count - 1) <= std::numeric_limits<truncated_hash_type>::max();
+        }
+        else {
+            return false;   
+        }
+    }
+    using bucket_entry = tsl::detail_robin_hash::bucket_entry<value_type, STORE_HASH>;
+    using distance_type = typename bucket_entry::distance_type;
+    using buckets_allocator = typename std::allocator_traits<allocator_type>::template rebind_alloc<bucket_entry>;
+    using buckets_container_type = std::vector<bucket_entry, buckets_allocator>;
+public: 
+    /**
+     * The 'operator*()' and 'operator->()' methods return a const reference and const pointer respectively to the 
+     * stored value type.
+     * 
+     * In case of a map, to get a mutable reference to the value associated to a key (the '.second' in the 
+     * stored pair), you have to call 'value()'. 
+     * 
+     * The main reason for this is that if we returned a `std::pair<Key, T>&` instead 
+     * of a `const std::pair<Key, T>&`, the user may modify the key which will put the map in a undefined state.
+     */
+    template<bool IsConst>
+    class robin_iterator {
+        friend class robin_hash;
+    private:
+        using bucket_entry_ptr = typename std::conditional<IsConst, 
+                                                           const bucket_entry*, 
+                                                           bucket_entry*>::type;
+        robin_iterator(bucket_entry_ptr bucket) noexcept: m_bucket(bucket) {
+        }
+    public:
+        using iterator_category = std::forward_iterator_tag;
+        using value_type = const typename robin_hash::value_type;
+        using difference_type = std::ptrdiff_t;
+        using reference = value_type&;
+        using pointer = value_type*;
+        robin_iterator() noexcept {
+        }
+        // Copy constructor from iterator to const_iterator.
+        template<bool TIsConst = IsConst, typename std::enable_if<TIsConst>::type* = nullptr>
+        robin_iterator(const robin_iterator<!TIsConst>& other) noexcept: m_bucket(other.m_bucket) {
+        }
+        robin_iterator(const robin_iterator& other) = default;
+        robin_iterator(robin_iterator&& other) = default;
+        robin_iterator& operator=(const robin_iterator& other) = default;
+        robin_iterator& operator=(robin_iterator&& other) = default;
+        const typename robin_hash::key_type& key() const {
+            return KeySelect()(m_bucket->value());
+        }
+        template<class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value && IsConst>::type* = nullptr>
+        const typename U::value_type& value() const {
+            return U()(m_bucket->value());
+        }
+        template<class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value && !IsConst>::type* = nullptr>
+        typename U::value_type& value() {
+            return U()(m_bucket->value());
+        }
+        reference operator*() const {
+            return m_bucket->value();
+        }
+        pointer operator->() const {
+            return std::addressof(m_bucket->value());
+        }
+        robin_iterator& operator++() {
+            while(true) {
+                if(m_bucket->last_bucket()) {
+                    ++m_bucket;
+                    return *this;
+                }
+                ++m_bucket;
+                if(!m_bucket->empty()) {
+                    return *this;
+                }
+            }
+        }
+        robin_iterator operator++(int) {
+            robin_iterator tmp(*this);
+            ++*this;
+            return tmp;
+        }
+        friend bool operator==(const robin_iterator& lhs, const robin_iterator& rhs) { 
+            return lhs.m_bucket == rhs.m_bucket; 
+        }
+        friend bool operator!=(const robin_iterator& lhs, const robin_iterator& rhs) { 
+            return !(lhs == rhs); 
+        }
+    private:
+        bucket_entry_ptr m_bucket;
+    };
+public:
+#if defined(__cplusplus) && __cplusplus >= 201402L
+    robin_hash(size_type bucket_count, 
+               const Hash& hash,
+               const KeyEqual& equal,
+               const Allocator& alloc,
+               float min_load_factor = DEFAULT_MIN_LOAD_FACTOR,
+               float max_load_factor = DEFAULT_MAX_LOAD_FACTOR): 
+                                       Hash(hash), 
+                                       KeyEqual(equal),
+                                       GrowthPolicy(bucket_count),
+                                       m_buckets_data(
+                                           [&]() {
+                                               if(bucket_count > max_bucket_count()) {
+                                                   TSL_RH_THROW_OR_TERMINATE(std::length_error, 
+                                                                             "The map exceeds its maximum bucket count.");
+                                               }
+                                               return bucket_count;
+                                           }(), alloc
+                                       ),
+                                       m_buckets(m_buckets_data.empty()?static_empty_bucket_ptr():m_buckets_data.data()),
+                                       m_bucket_count(bucket_count),
+                                       m_nb_elements(0), 
+                                       m_grow_on_next_insert(false),
+                                       m_try_skrink_on_next_insert(false)
+    {
+        if(m_bucket_count > 0) {
+            tsl_rh_assert(!m_buckets_data.empty());
+            m_buckets_data.back().set_as_last_bucket();
+        }
+        this->min_load_factor(min_load_factor);
+        this->max_load_factor(max_load_factor);
+    }
+#else
+    /**
+     * C++11 doesn't support the creation of a std::vector with a custom allocator and 'count' default-inserted elements. 
+     * The needed contructor `explicit vector(size_type count, const Allocator& alloc = Allocator());` is only
+     * available in C++14 and later. We thus must resize after using the `vector(const Allocator& alloc)` constructor.
+     * 
+     * We can't use `vector(size_type count, const T& value, const Allocator& alloc)` as it requires the
+     * value T to be copyable.
+     */
+    robin_hash(size_type bucket_count, 
+               const Hash& hash,
+               const KeyEqual& equal,
+               const Allocator& alloc,
+               float min_load_factor = DEFAULT_MIN_LOAD_FACTOR,
+               float max_load_factor = DEFAULT_MAX_LOAD_FACTOR): 
+                                       Hash(hash), 
+                                       KeyEqual(equal),
+                                       GrowthPolicy(bucket_count),
+                                       m_buckets_data(alloc), 
+                                       m_buckets(static_empty_bucket_ptr()), 
+                                       m_bucket_count(bucket_count),
+                                       m_nb_elements(0), 
+                                       m_grow_on_next_insert(false),
+                                       m_try_skrink_on_next_insert(false)
+    {
+        if(bucket_count > max_bucket_count()) {
+            TSL_RH_THROW_OR_TERMINATE(std::length_error, "The map exceeds its maxmimum bucket count.");
+        }
+        if(m_bucket_count > 0) {
+            m_buckets_data.resize(m_bucket_count);
+            m_buckets = m_buckets_data.data();
+            tsl_rh_assert(!m_buckets_data.empty());
+            m_buckets_data.back().set_as_last_bucket();
+        }
+        this->min_load_factor(min_load_factor);
+        this->max_load_factor(max_load_factor);
+    }
+#endif
+    robin_hash(const robin_hash& other): Hash(other),
+                                         KeyEqual(other),
+                                         GrowthPolicy(other),
+                                         m_buckets_data(other.m_buckets_data),
+                                         m_buckets(m_buckets_data.empty()?static_empty_bucket_ptr():m_buckets_data.data()),
+                                         m_bucket_count(other.m_bucket_count),
+                                         m_nb_elements(other.m_nb_elements),
+                                         m_load_threshold(other.m_load_threshold),
+                                         m_max_load_factor(other.m_max_load_factor),
+                                         m_grow_on_next_insert(other.m_grow_on_next_insert),
+                                         m_min_load_factor(other.m_min_load_factor),
+                                         m_try_skrink_on_next_insert(other.m_try_skrink_on_next_insert)
+    {
+    }
+    robin_hash(robin_hash&& other) noexcept(std::is_nothrow_move_constructible<Hash>::value &&
+                                            std::is_nothrow_move_constructible<KeyEqual>::value &&
+                                            std::is_nothrow_move_constructible<GrowthPolicy>::value &&
+                                            std::is_nothrow_move_constructible<buckets_container_type>::value)
+                                          : Hash(std::move(static_cast<Hash&>(other))),
+                                            KeyEqual(std::move(static_cast<KeyEqual&>(other))),
+                                            GrowthPolicy(std::move(static_cast<GrowthPolicy&>(other))),
+                                            m_buckets_data(std::move(other.m_buckets_data)),
+                                            m_buckets(m_buckets_data.empty()?static_empty_bucket_ptr():m_buckets_data.data()),
+                                            m_bucket_count(other.m_bucket_count),
+                                            m_nb_elements(other.m_nb_elements),
+                                            m_load_threshold(other.m_load_threshold),
+                                            m_max_load_factor(other.m_max_load_factor),
+                                            m_grow_on_next_insert(other.m_grow_on_next_insert),
+                                            m_min_load_factor(other.m_min_load_factor),
+                                            m_try_skrink_on_next_insert(other.m_try_skrink_on_next_insert)
+    {
+        other.GrowthPolicy::clear();
+        other.m_buckets_data.clear();
+        other.m_buckets = static_empty_bucket_ptr();
+        other.m_bucket_count = 0;
+        other.m_nb_elements = 0;
+        other.m_load_threshold = 0;
+        other.m_grow_on_next_insert = false;
+        other.m_try_skrink_on_next_insert = false;
+    }
+    robin_hash& operator=(const robin_hash& other) {
+        if(&other != this) {
+            Hash::operator=(other);
+            KeyEqual::operator=(other);
+            GrowthPolicy::operator=(other);
+            m_buckets_data = other.m_buckets_data;
+            m_buckets = m_buckets_data.empty()?static_empty_bucket_ptr():
+                                               m_buckets_data.data();
+            m_bucket_count = other.m_bucket_count;
+            m_nb_elements = other.m_nb_elements;
+            m_load_threshold = other.m_load_threshold;
+            m_max_load_factor = other.m_max_load_factor;
+            m_grow_on_next_insert = other.m_grow_on_next_insert;
+            m_min_load_factor = other.m_min_load_factor;
+            m_try_skrink_on_next_insert = other.m_try_skrink_on_next_insert;
+        }
+        return *this;
+    }
+    robin_hash& operator=(robin_hash&& other) {
+        other.swap(*this);
+        other.clear();
+        return *this;
+    }
+    allocator_type get_allocator() const {
+        return m_buckets_data.get_allocator();
+    }
+    /*
+     * Iterators
+     */
+    iterator begin() noexcept {
+        std::size_t i = 0;
+        while(i < m_bucket_count && m_buckets[i].empty()) {
+            i++;
+        }
+        return iterator(m_buckets + i);
+    }
+    const_iterator begin() const noexcept {
+        return cbegin();
+    }
+    const_iterator cbegin() const noexcept {
+        std::size_t i = 0;
+        while(i < m_bucket_count && m_buckets[i].empty()) {
+            i++;
+        }
+        return const_iterator(m_buckets + i);
+    }
+    iterator end() noexcept {
+        return iterator(m_buckets + m_bucket_count);
+    }
+    const_iterator end() const noexcept {
+        return cend();
+    }
+    const_iterator cend() const noexcept {
+        return const_iterator(m_buckets + m_bucket_count);
+    }
+    /*
+     * Capacity
+     */
+    bool empty() const noexcept {
+        return m_nb_elements == 0;
+    }
+    size_type size() const noexcept {
+        return m_nb_elements;
+    }
+    size_type max_size() const noexcept {
+        return m_buckets_data.max_size();
+    }
+    /*
+     * Modifiers
+     */
+    void clear() noexcept {
+        for(auto& bucket: m_buckets_data) {
+            bucket.clear();
+        }
+        m_nb_elements = 0;
+        m_grow_on_next_insert = false;
+    }
+    template<typename P>
+    std::pair<iterator, bool> insert(P&& value) {
+        return insert_impl(KeySelect()(value), std::forward<P>(value));
+    }
+    template<typename P>
+    iterator insert_hint(const_iterator hint, P&& value) { 
+        if(hint != cend() && compare_keys(KeySelect()(*hint), KeySelect()(value))) { 
+            return mutable_iterator(hint); 
+        }
+        return insert(std::forward<P>(value)).first; 
+    }
+    template<class InputIt>
+    void insert(InputIt first, InputIt last) {
+        if(std::is_base_of<std::forward_iterator_tag, 
+                           typename std::iterator_traits<InputIt>::iterator_category>::value) 
+        {
+            const auto nb_elements_insert = std::distance(first, last);
+            const size_type nb_free_buckets = m_load_threshold - size();
+            tsl_rh_assert(m_load_threshold >= size());
+            if(nb_elements_insert > 0 && nb_free_buckets < size_type(nb_elements_insert)) {
+                reserve(size() + size_type(nb_elements_insert));
+            }
+        }
+        for(; first != last; ++first) {
+            insert(*first);
+        }
+    }
+    template<class K, class M>
+    std::pair<iterator, bool> insert_or_assign(K&& key, M&& obj) { 
+        auto it = try_emplace(std::forward<K>(key), std::forward<M>(obj));
+        if(!it.second) {
+            it.first.value() = std::forward<M>(obj);
+        }
+        return it;
+    }
+    template<class K, class M>
+    iterator insert_or_assign(const_iterator hint, K&& key, M&& obj) {
+        if(hint != cend() && compare_keys(KeySelect()(*hint), key)) { 
+            auto it = mutable_iterator(hint); 
+            it.value() = std::forward<M>(obj);
+            return it;
+        }
+        return insert_or_assign(std::forward<K>(key), std::forward<M>(obj)).first;
+    }
+    template<class... Args>
+    std::pair<iterator, bool> emplace(Args&&... args) {
+        return insert(value_type(std::forward<Args>(args)...));
+    }
+    template<class... Args>
+    iterator emplace_hint(const_iterator hint, Args&&... args) {
+        return insert_hint(hint, value_type(std::forward<Args>(args)...));        
+    }
+    template<class K, class... Args>
+    std::pair<iterator, bool> try_emplace(K&& key, Args&&... args) {
+        return insert_impl(key, std::piecewise_construct, 
+                                std::forward_as_tuple(std::forward<K>(key)), 
+                                std::forward_as_tuple(std::forward<Args>(args)...));
+    }
+    template<class K, class... Args>
+    iterator try_emplace_hint(const_iterator hint, K&& key, Args&&... args) { 
+        if(hint != cend() && compare_keys(KeySelect()(*hint), key)) { 
+            return mutable_iterator(hint); 
+        }
+        return try_emplace(std::forward<K>(key), std::forward<Args>(args)...).first;
+    }
+    /**
+     * Here to avoid `template<class K> size_type erase(const K& key)` being used when
+     * we use an `iterator` instead of a `const_iterator`.
+     */
+    iterator erase(iterator pos) {
+        erase_from_bucket(pos);
+        /**
+         * Erase bucket used a backward shift after clearing the bucket.
+         * Check if there is a new value in the bucket, if not get the next non-empty.
+         */
+        if(pos.m_bucket->empty()) {
+            ++pos;
+        }
+        m_try_skrink_on_next_insert = true;
+        return pos;
+    }
+    iterator erase(const_iterator pos) {
+        return erase(mutable_iterator(pos));
+    }
+    iterator erase(const_iterator first, const_iterator last) {
+        if(first == last) {
+            return mutable_iterator(first);
+        }
+        auto first_mutable = mutable_iterator(first);
+        auto last_mutable = mutable_iterator(last);
+        for(auto it = first_mutable.m_bucket; it != last_mutable.m_bucket; ++it) {
+            if(!it->empty()) {
+                it->clear();
+                m_nb_elements--;
+            }
+        }
+        if(last_mutable == end()) {
+            return end();
+        }
+        /*
+         * Backward shift on the values which come after the deleted values.
+         * We try to move the values closer to their ideal bucket.
+         */
+        std::size_t icloser_bucket = static_cast<std::size_t>(first_mutable.m_bucket - m_buckets);
+        std::size_t ito_move_closer_value = static_cast<std::size_t>(last_mutable.m_bucket - m_buckets);
+        tsl_rh_assert(ito_move_closer_value > icloser_bucket);
+        const std::size_t ireturn_bucket = ito_move_closer_value - 
+                                           std::min(ito_move_closer_value - icloser_bucket, 
+                                                    std::size_t(m_buckets[ito_move_closer_value].dist_from_ideal_bucket()));
+        while(ito_move_closer_value < m_bucket_count && m_buckets[ito_move_closer_value].dist_from_ideal_bucket() > 0) {
+            icloser_bucket = ito_move_closer_value - 
+                             std::min(ito_move_closer_value - icloser_bucket, 
+                                      std::size_t(m_buckets[ito_move_closer_value].dist_from_ideal_bucket()));
+            tsl_rh_assert(m_buckets[icloser_bucket].empty());
+            const distance_type new_distance = distance_type(m_buckets[ito_move_closer_value].dist_from_ideal_bucket() -
+                                                             (ito_move_closer_value - icloser_bucket));
+            m_buckets[icloser_bucket].set_value_of_empty_bucket(new_distance, 
+                                                                m_buckets[ito_move_closer_value].truncated_hash(), 
+                                                                std::move(m_buckets[ito_move_closer_value].value()));
+            m_buckets[ito_move_closer_value].clear();
+            ++icloser_bucket;
+            ++ito_move_closer_value;
+        }
+        m_try_skrink_on_next_insert = true;
+        return iterator(m_buckets + ireturn_bucket);
+    }
+    template<class K>
+    size_type erase(const K& key) {
+        return erase(key, hash_key(key));
+    }
+    template<class K>
+    size_type erase(const K& key, std::size_t hash) {
+        auto it = find(key, hash);
+        if(it != end()) {
+            erase_from_bucket(it);
+            m_try_skrink_on_next_insert = true;
+            return 1;
+        }
+        else {
+            return 0;
+        }
+    }
+    void swap(robin_hash& other) {
+        using std::swap;
+        swap(static_cast<Hash&>(*this), static_cast<Hash&>(other));
+        swap(static_cast<KeyEqual&>(*this), static_cast<KeyEqual&>(other));
+        swap(static_cast<GrowthPolicy&>(*this), static_cast<GrowthPolicy&>(other));
+        swap(m_buckets_data, other.m_buckets_data);
+        swap(m_buckets, other.m_buckets);
+        swap(m_bucket_count, other.m_bucket_count);
+        swap(m_nb_elements, other.m_nb_elements);
+        swap(m_load_threshold, other.m_load_threshold);
+        swap(m_max_load_factor, other.m_max_load_factor);
+        swap(m_grow_on_next_insert, other.m_grow_on_next_insert);
+        swap(m_min_load_factor, other.m_min_load_factor);
+        swap(m_try_skrink_on_next_insert, other.m_try_skrink_on_next_insert);
+    }
+    /*
+     * Lookup
+     */
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    typename U::value_type& at(const K& key) {
+        return at(key, hash_key(key));
+    }
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    typename U::value_type& at(const K& key, std::size_t hash) {
+        return const_cast<typename U::value_type&>(static_cast<const robin_hash*>(this)->at(key, hash));
+    }
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    const typename U::value_type& at(const K& key) const {
+        return at(key, hash_key(key));
+    }
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    const typename U::value_type& at(const K& key, std::size_t hash) const {
+        auto it = find(key, hash);
+        if(it != cend()) {
+            return it.value();
+        }
+        else {
+            TSL_RH_THROW_OR_TERMINATE(std::out_of_range, "Couldn't find key.");
+        }
+    }
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    typename U::value_type& operator[](K&& key) {
+        return try_emplace(std::forward<K>(key)).first.value();
+    }
+    template<class K>
+    size_type count(const K& key) const {
+        return count(key, hash_key(key));
+    }
+    template<class K>
+    size_type count(const K& key, std::size_t hash) const {
+        if(find(key, hash) != cend()) {
+            return 1;
+        }
+        else {
+            return 0;
+        }
+    }
+    template<class K>
+    iterator find(const K& key) {
+        return find_impl(key, hash_key(key));
+    }
+    template<class K>
+    iterator find(const K& key, std::size_t hash) {
+        return find_impl(key, hash);
+    }
+    template<class K>
+    const_iterator find(const K& key) const {
+        return find_impl(key, hash_key(key));
+    }
+    template<class K>
+    const_iterator find(const K& key, std::size_t hash) const {
+        return find_impl(key, hash);
+    }
+    template<class K>
+    std::pair<iterator, iterator> equal_range(const K& key) {
+        return equal_range(key, hash_key(key));
+    }
+    template<class K>
+    std::pair<iterator, iterator> equal_range(const K& key, std::size_t hash) {
+        iterator it = find(key, hash);
+        return std::make_pair(it, (it == end())?it:std::next(it));
+    }
+    template<class K>
+    std::pair<const_iterator, const_iterator> equal_range(const K& key) const {
+        return equal_range(key, hash_key(key));
+    }
+    template<class K>
+    std::pair<const_iterator, const_iterator> equal_range(const K& key, std::size_t hash) const {
+        const_iterator it = find(key, hash);
+        return std::make_pair(it, (it == cend())?it:std::next(it));
+    }
+    /*
+     * Bucket interface 
+     */
+    size_type bucket_count() const {
+        return m_bucket_count; 
+    }
+    size_type max_bucket_count() const {
+        return std::min(GrowthPolicy::max_bucket_count(), m_buckets_data.max_size());
+    }
+    /*
+     * Hash policy 
+     */
+    float load_factor() const {
+        if(bucket_count() == 0) {
+            return 0;
+        }
+        return float(m_nb_elements)/float(bucket_count());
+    }
+    float min_load_factor() const {
+        return m_min_load_factor;
+    }
+    float max_load_factor() const {
+        return m_max_load_factor;
+    }
+    void min_load_factor(float ml) {
+        m_min_load_factor = clamp(ml, float(MINIMUM_MIN_LOAD_FACTOR), 
+                                      float(MAXIMUM_MIN_LOAD_FACTOR));
+    }
+    void max_load_factor(float ml) {
+        m_max_load_factor = clamp(ml, float(MINIMUM_MAX_LOAD_FACTOR), 
+                                      float(MAXIMUM_MAX_LOAD_FACTOR));
+        m_load_threshold = size_type(float(bucket_count())*m_max_load_factor);
+    }
+    void rehash(size_type count) {
+        count = std::max(count, size_type(std::ceil(float(size())/max_load_factor())));
+        rehash_impl(count);
+    }
+    void reserve(size_type count) {
+        rehash(size_type(std::ceil(float(count)/max_load_factor())));
+    }    
+    /*
+     * Observers
+     */
+    hasher hash_function() const {
+        return static_cast<const Hash&>(*this);
+    }
+    key_equal key_eq() const {
+        return static_cast<const KeyEqual&>(*this);
+    }
+    /*
+     * Other
+     */    
+    iterator mutable_iterator(const_iterator pos) {
+        return iterator(const_cast<bucket_entry*>(pos.m_bucket));
+    }
+private:
+    template<class K>
+    std::size_t hash_key(const K& key) const {
+        return Hash::operator()(key);
+    }
+    template<class K1, class K2>
+    bool compare_keys(const K1& key1, const K2& key2) const {
+        return KeyEqual::operator()(key1, key2);
+    }
+    std::size_t bucket_for_hash(std::size_t hash) const {
+        const std::size_t bucket = GrowthPolicy::bucket_for_hash(hash);
+        tsl_rh_assert(bucket < m_bucket_count || (bucket == 0 && m_bucket_count == 0));
+        return bucket;
+    }
+    template<class U = GrowthPolicy, typename std::enable_if<is_power_of_two_policy<U>::value>::type* = nullptr>
+    std::size_t next_bucket(std::size_t index) const noexcept {
+        tsl_rh_assert(index < bucket_count());
+        return (index + 1) & this->m_mask;
+    }
+    template<class U = GrowthPolicy, typename std::enable_if<!is_power_of_two_policy<U>::value>::type* = nullptr>
+    std::size_t next_bucket(std::size_t index) const noexcept {
+        tsl_rh_assert(index < bucket_count());
+        index++;
+        return (index != bucket_count())?index:0;
+    }
+    template<class K>
+    iterator find_impl(const K& key, std::size_t hash) {
+        return mutable_iterator(static_cast<const robin_hash*>(this)->find(key, hash));
+    }
+    template<class K>
+    const_iterator find_impl(const K& key, std::size_t hash) const {
+        std::size_t ibucket = bucket_for_hash(hash); 
+        distance_type dist_from_ideal_bucket = 0;
+        while(dist_from_ideal_bucket <= m_buckets[ibucket].dist_from_ideal_bucket()) {
+            if(TSL_RH_LIKELY((!USE_STORED_HASH_ON_LOOKUP || m_buckets[ibucket].bucket_hash_equal(hash)) && 
+               compare_keys(KeySelect()(m_buckets[ibucket].value()), key))) 
+            {
+                return const_iterator(m_buckets + ibucket);
+            }
+            ibucket = next_bucket(ibucket);
+            dist_from_ideal_bucket++;
+        }
+        return cend();
+    }
+    void erase_from_bucket(iterator pos) {
+        pos.m_bucket->clear();
+        m_nb_elements--;
+        /**
+         * Backward shift, swap the empty bucket, previous_ibucket, with the values on its right, ibucket,
+         * until we cross another empty bucket or if the other bucket has a distance_from_ideal_bucket == 0.
+         * 
+         * We try to move the values closer to their ideal bucket.
+         */
+        std::size_t previous_ibucket = static_cast<std::size_t>(pos.m_bucket - m_buckets);
+        std::size_t ibucket = next_bucket(previous_ibucket);
+        while(m_buckets[ibucket].dist_from_ideal_bucket() > 0) {
+            tsl_rh_assert(m_buckets[previous_ibucket].empty());
+            const distance_type new_distance = distance_type(m_buckets[ibucket].dist_from_ideal_bucket() - 1);
+            m_buckets[previous_ibucket].set_value_of_empty_bucket(new_distance, m_buckets[ibucket].truncated_hash(), 
+                                                                  std::move(m_buckets[ibucket].value()));
+            m_buckets[ibucket].clear();
+            previous_ibucket = ibucket;
+            ibucket = next_bucket(ibucket);
+        }
+    }
+    template<class K, class... Args>
+    std::pair<iterator, bool> insert_impl(const K& key, Args&&... value_type_args) {
+        const std::size_t hash = hash_key(key);
+        std::size_t ibucket = bucket_for_hash(hash); 
+        distance_type dist_from_ideal_bucket = 0;
+        while(dist_from_ideal_bucket <= m_buckets[ibucket].dist_from_ideal_bucket()) {
+            if((!USE_STORED_HASH_ON_LOOKUP || m_buckets[ibucket].bucket_hash_equal(hash)) &&
+               compare_keys(KeySelect()(m_buckets[ibucket].value()), key)) 
+            {
+                return std::make_pair(iterator(m_buckets + ibucket), false);
+            }
+            ibucket = next_bucket(ibucket);
+            dist_from_ideal_bucket++;
+        }
+        if(rehash_on_extreme_load()) {
+            ibucket = bucket_for_hash(hash);
+            dist_from_ideal_bucket = 0;
+            while(dist_from_ideal_bucket <= m_buckets[ibucket].dist_from_ideal_bucket()) {
+                ibucket = next_bucket(ibucket);
+                dist_from_ideal_bucket++;
+            }
+        }
+        if(m_buckets[ibucket].empty()) {
+            m_buckets[ibucket].set_value_of_empty_bucket(dist_from_ideal_bucket, bucket_entry::truncate_hash(hash),
+                                                         std::forward<Args>(value_type_args)...);
+        }
+        else {
+            insert_value(ibucket, dist_from_ideal_bucket, bucket_entry::truncate_hash(hash), 
+                         std::forward<Args>(value_type_args)...);
+        }
+        m_nb_elements++;
+        /*
+         * The value will be inserted in ibucket in any case, either because it was
+         * empty or by stealing the bucket (robin hood). 
+         */
+        return std::make_pair(iterator(m_buckets + ibucket), true);
+    }
+    template<class... Args>
+    void insert_value(std::size_t ibucket, distance_type dist_from_ideal_bucket, 
+                      truncated_hash_type hash, Args&&... value_type_args) 
+    {
+        value_type value(std::forward<Args>(value_type_args)...);
+        insert_value_impl(ibucket, dist_from_ideal_bucket, hash, value);
+    }
+    void insert_value(std::size_t ibucket, distance_type dist_from_ideal_bucket,
+                      truncated_hash_type hash, value_type&& value)
+    {
+        insert_value_impl(ibucket, dist_from_ideal_bucket, hash, value);
+    }
+    /*
+     * We don't use `value_type&& value` as last argument due to a bug in MSVC when `value_type` is a pointer,
+     * The compiler is not able to see the difference between `std::string*` and `std::string*&&` resulting in 
+     * compile error.
+     * 
+     * The `value` will be in a moved state at the end of the function.
+     */
+    void insert_value_impl(std::size_t ibucket, distance_type dist_from_ideal_bucket,
+                           truncated_hash_type hash, value_type& value)
+    {
+        m_buckets[ibucket].swap_with_value_in_bucket(dist_from_ideal_bucket, hash, value);
+        ibucket = next_bucket(ibucket);
+        dist_from_ideal_bucket++;
+        while(!m_buckets[ibucket].empty()) {
+            if(dist_from_ideal_bucket > m_buckets[ibucket].dist_from_ideal_bucket()) {
+                if(dist_from_ideal_bucket >= REHASH_ON_HIGH_NB_PROBES__NPROBES && 
+                   load_factor() >= REHASH_ON_HIGH_NB_PROBES__MIN_LOAD_FACTOR) 
+                {
+                    /**
+                     * The number of probes is really high, rehash the map on the next insert.
+                     * Difficult to do now as rehash may throw an exception.
+                     */
+                    m_grow_on_next_insert = true;
+                }
+                m_buckets[ibucket].swap_with_value_in_bucket(dist_from_ideal_bucket, hash, value);
+            }
+            ibucket = next_bucket(ibucket);
+            dist_from_ideal_bucket++;
+        }
+        m_buckets[ibucket].set_value_of_empty_bucket(dist_from_ideal_bucket, hash, std::move(value));
+    }
+    void rehash_impl(size_type count) {
+        robin_hash new_table(count, static_cast<Hash&>(*this), static_cast<KeyEqual&>(*this), 
+                             get_allocator(), m_min_load_factor, m_max_load_factor);
+        const bool use_stored_hash = USE_STORED_HASH_ON_REHASH(new_table.bucket_count());
+        for(auto& bucket: m_buckets_data) {
+            if(bucket.empty()) { 
+                continue; 
+            }
+            const std::size_t hash = use_stored_hash?bucket.truncated_hash():
+                                                     new_table.hash_key(KeySelect()(bucket.value()));
+            new_table.insert_value_on_rehash(new_table.bucket_for_hash(hash), 0, 
+                                             bucket_entry::truncate_hash(hash), std::move(bucket.value()));
+        }
+        new_table.m_nb_elements = m_nb_elements;
+        new_table.swap(*this);
+    }
+    void insert_value_on_rehash(std::size_t ibucket, distance_type dist_from_ideal_bucket, 
+                                truncated_hash_type hash, value_type&& value) 
+    {
+        while(true) {
+            if(dist_from_ideal_bucket > m_buckets[ibucket].dist_from_ideal_bucket()) {
+                if(m_buckets[ibucket].empty()) {
+                    m_buckets[ibucket].set_value_of_empty_bucket(dist_from_ideal_bucket, hash, std::move(value));
+                    return;
+                }
+                else {
+                    m_buckets[ibucket].swap_with_value_in_bucket(dist_from_ideal_bucket, hash, value);
+                }
+            }
+            dist_from_ideal_bucket++;
+            ibucket = next_bucket(ibucket);
+        }
+    }
+    /**
+     * Grow the table if m_grow_on_next_insert is true or we reached the max_load_factor.
+     * Shrink the table if m_try_skrink_on_next_insert is true (an erase occured) and
+     * we're below the min_load_factor.
+     * 
+     * Return true if the table has been rehashed.
+     */
+    bool rehash_on_extreme_load() {
+        if(m_grow_on_next_insert || size() >= m_load_threshold) {
+            rehash_impl(GrowthPolicy::next_bucket_count());
+            m_grow_on_next_insert = false;
+            return true;
+        }
+        if(m_try_skrink_on_next_insert) {
+            m_try_skrink_on_next_insert = false;
+            if(m_min_load_factor != 0.0f && load_factor() < m_min_load_factor) {
+                reserve(size() + 1);
+                return true;
+            }
+        }
+        return false;
+    }
+public:
+    static const size_type DEFAULT_INIT_BUCKETS_SIZE = 0;
+    static constexpr float DEFAULT_MAX_LOAD_FACTOR = 0.5f;
+    static constexpr float MINIMUM_MAX_LOAD_FACTOR = 0.2f;
+    static constexpr float MAXIMUM_MAX_LOAD_FACTOR = 0.95f;
+    static constexpr float DEFAULT_MIN_LOAD_FACTOR = 0.0f;
+    static constexpr float MINIMUM_MIN_LOAD_FACTOR = 0.0f;
+    static constexpr float MAXIMUM_MIN_LOAD_FACTOR = 0.15f;
+    static_assert(MINIMUM_MAX_LOAD_FACTOR < MAXIMUM_MAX_LOAD_FACTOR, 
+                  "MINIMUM_MAX_LOAD_FACTOR should be < MAXIMUM_MAX_LOAD_FACTOR");
+    static_assert(MINIMUM_MIN_LOAD_FACTOR < MAXIMUM_MIN_LOAD_FACTOR, 
+                  "MINIMUM_MIN_LOAD_FACTOR should be < MAXIMUM_MIN_LOAD_FACTOR");
+    static_assert(MAXIMUM_MIN_LOAD_FACTOR < MINIMUM_MAX_LOAD_FACTOR, 
+                  "MAXIMUM_MIN_LOAD_FACTOR should be < MINIMUM_MAX_LOAD_FACTOR");
+private:
+    static const distance_type REHASH_ON_HIGH_NB_PROBES__NPROBES = 128;
+    static constexpr float REHASH_ON_HIGH_NB_PROBES__MIN_LOAD_FACTOR = 0.15f;
+    /**
+     * Return an always valid pointer to an static empty bucket_entry with last_bucket() == true.
+     */            
+    bucket_entry* static_empty_bucket_ptr() {
+        static bucket_entry empty_bucket(true);
+        return &empty_bucket;
+    }
+private:
+    buckets_container_type m_buckets_data;
+    /**
+     * Points to m_buckets_data.data() if !m_buckets_data.empty() otherwise points to static_empty_bucket_ptr.
+     * This variable is useful to avoid the cost of checking if m_buckets_data is empty when trying 
+     * to find an element.
+     * 
+     * TODO Remove m_buckets_data and only use a pointer instead of a pointer+vector to save some space in the robin_hash object.
+     * Manage the Allocator manually.
+     */
+    bucket_entry* m_buckets;
+    /**
+     * Used a lot in find, avoid the call to m_buckets_data.size() which is a bit slower.
+     */
+    size_type m_bucket_count;
+    size_type m_nb_elements;
+    size_type m_load_threshold;
+    float m_max_load_factor;
+    bool m_grow_on_next_insert;
+    float m_min_load_factor;
+    /**
+     * We can't shrink down the map on erase operations as the erase methods need to return the next iterator.
+     * Shrinking the map would invalidate all the iterators and we could not return the next iterator in a meaningful way,
+     * On erase, we thus just indicate on erase that we should try to shrink the hash table on the next insert
+     * if we go below the min_load_factor. 
+     */
+    bool m_try_skrink_on_next_insert;
+};
+}
+}
+#endif
--- a/include/tsl/robin_map.h
+++ b/include/tsl/robin_map.h
+/**
+ * MIT License
+ * 
+ * Copyright (c) 2017 Tessil
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef TSL_ROBIN_MAP_H
+#define TSL_ROBIN_MAP_H 
+#include <cstddef>
+#include <functional>
+#include <initializer_list>
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include "robin_hash.h"
+namespace tsl {
+/**
+ * Implementation of a hash map using open-adressing and the robin hood hashing algorithm with backward shift deletion.
+ * 
+ * For operations modifying the hash map (insert, erase, rehash, ...), the strong exception guarantee 
+ * is only guaranteed when the expression `std::is_nothrow_swappable<std::pair<Key, T>>::value &&
+ * std::is_nothrow_move_constructible<std::pair<Key, T>>::value` is true, otherwise if an exception
+ * is thrown during the swap or the move, the hash map may end up in a undefined state. Per the standard
+ * a `Key` or `T` with a noexcept copy constructor and no move constructor also satisfies the 
+ * `std::is_nothrow_move_constructible<std::pair<Key, T>>::value` criterion (and will thus guarantee the 
+ * strong exception for the map).
+ * 
+ * When `StoreHash` is true, 32 bits of the hash are stored alongside the values. It can improve 
+ * the performance during lookups if the `KeyEqual` function takes time (if it engenders a cache-miss for example) 
+ * as we then compare the stored hashes before comparing the keys. When `tsl::rh::power_of_two_growth_policy` is used
+ * as `GrowthPolicy`, it may also speed-up the rehash process as we can avoid to recalculate the hash. 
+ * When it is detected that storing the hash will not incur any memory penality due to alignement (i.e. 
+ * `sizeof(tsl::detail_robin_hash::bucket_entry<ValueType, true>) == 
+ * sizeof(tsl::detail_robin_hash::bucket_entry<ValueType, false>)`) and `tsl::rh::power_of_two_growth_policy` is
+ * used, the hash will be stored even if `StoreHash` is false so that we can speed-up the rehash (but it will
+ * not be used on lookups unless `StoreHash` is true).
+ * 
+ * `GrowthPolicy` defines how the map grows and consequently how a hash value is mapped to a bucket. 
+ * By default the map uses `tsl::rh::power_of_two_growth_policy`. This policy keeps the number of buckets 
+ * to a power of two and uses a mask to map the hash to a bucket instead of the slow modulo.
+ * Other growth policies are available and you may define your own growth policy, 
+ * check `tsl::rh::power_of_two_growth_policy` for the interface.
+ * 
+ * `std::pair<Key, T>` must be swappable.
+ * 
+ * `Key` and `T` must be copy and/or move constructible.
+ * 
+ * If the destructor of `Key` or `T` throws an exception, the behaviour of the class is undefined.
+ * 
+ * Iterators invalidation:
+ *  - clear, operator=, reserve, rehash: always invalidate the iterators.
+ *  - insert, emplace, emplace_hint, operator[]: if there is an effective insert, invalidate the iterators.
+ *  - erase: always invalidate the iterators.
+ */
+template<class Key, 
+         class T, 
+         class Hash = std::hash<Key>,
+         class KeyEqual = std::equal_to<Key>,
+         class Allocator = std::allocator<std::pair<Key, T>>,
+         bool StoreHash = false,
+         class GrowthPolicy = tsl::rh::power_of_two_growth_policy<2>>
+class robin_map {
+private:
+    template<typename U>
+    using has_is_transparent = tsl::detail_robin_hash::has_is_transparent<U>;
+    class KeySelect {
+    public:
+        using key_type = Key;
+        const key_type& operator()(const std::pair<Key, T>& key_value) const noexcept {
+            return key_value.first;
+        }
+        key_type& operator()(std::pair<Key, T>& key_value) noexcept {
+            return key_value.first;
+        }
+    };  
+    class ValueSelect {
+    public:
+        using value_type = T;
+        const value_type& operator()(const std::pair<Key, T>& key_value) const noexcept {
+            return key_value.second;
+        }
+        value_type& operator()(std::pair<Key, T>& key_value) noexcept {
+            return key_value.second;
+        }
+    };
+    using ht = detail_robin_hash::robin_hash<std::pair<Key, T>, KeySelect, ValueSelect,
+                                             Hash, KeyEqual, Allocator, StoreHash, GrowthPolicy>;  
+public:
+    using key_type = typename ht::key_type;
+    using mapped_type = T;
+    using value_type = typename ht::value_type;
+    using size_type = typename ht::size_type;
+    using difference_type = typename ht::difference_type;
+    using hasher = typename ht::hasher;
+    using key_equal = typename ht::key_equal;
+    using allocator_type = typename ht::allocator_type;
+    using reference = typename ht::reference;
+    using const_reference = typename ht::const_reference;
+    using pointer = typename ht::pointer;
+    using const_pointer = typename ht::const_pointer;
+    using iterator = typename ht::iterator;
+    using const_iterator = typename ht::const_iterator;
+public:
+    /*
+     * Constructors
+     */
+    robin_map(): robin_map(ht::DEFAULT_INIT_BUCKETS_SIZE) {
+    }
+    explicit robin_map(size_type bucket_count, 
+                       const Hash& hash = Hash(),
+                       const KeyEqual& equal = KeyEqual(),
+                       const Allocator& alloc = Allocator()): 
+                m_ht(bucket_count, hash, equal, alloc)
+    {
+    }
+    robin_map(size_type bucket_count,
+              const Allocator& alloc): robin_map(bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+    robin_map(size_type bucket_count,
+              const Hash& hash,
+              const Allocator& alloc): robin_map(bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+    explicit robin_map(const Allocator& alloc): robin_map(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) {
+    }
+    template<class InputIt>
+    robin_map(InputIt first, InputIt last,
+              size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
+              const Hash& hash = Hash(),
+              const KeyEqual& equal = KeyEqual(),
+              const Allocator& alloc = Allocator()): robin_map(bucket_count, hash, equal, alloc)
+    {
+        insert(first, last);
+    }
+    template<class InputIt>
+    robin_map(InputIt first, InputIt last,
+              size_type bucket_count,
+              const Allocator& alloc): robin_map(first, last, bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+    template<class InputIt>
+    robin_map(InputIt first, InputIt last,
+              size_type bucket_count,
+              const Hash& hash,
+              const Allocator& alloc): robin_map(first, last, bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+    robin_map(std::initializer_list<value_type> init,
+              size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
+              const Hash& hash = Hash(),
+              const KeyEqual& equal = KeyEqual(),
+              const Allocator& alloc = Allocator()): 
+          robin_map(init.begin(), init.end(), bucket_count, hash, equal, alloc)
+    {
+    }
+    robin_map(std::initializer_list<value_type> init,
+              size_type bucket_count,
+              const Allocator& alloc): 
+          robin_map(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+    robin_map(std::initializer_list<value_type> init,
+              size_type bucket_count,
+              const Hash& hash,
+              const Allocator& alloc): 
+          robin_map(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+    robin_map& operator=(std::initializer_list<value_type> ilist) {
+        m_ht.clear();
+        m_ht.reserve(ilist.size());
+        m_ht.insert(ilist.begin(), ilist.end());
+        return *this;
+    }
+    allocator_type get_allocator() const { return m_ht.get_allocator(); }
+    /*
+     * Iterators
+     */
+    iterator begin() noexcept { return m_ht.begin(); }
+    const_iterator begin() const noexcept { return m_ht.begin(); }
+    const_iterator cbegin() const noexcept { return m_ht.cbegin(); }
+    iterator end() noexcept { return m_ht.end(); }
+    const_iterator end() const noexcept { return m_ht.end(); }
+    const_iterator cend() const noexcept { return m_ht.cend(); }
+    /*
+     * Capacity
+     */
+    bool empty() const noexcept { return m_ht.empty(); }
+    size_type size() const noexcept { return m_ht.size(); }
+    size_type max_size() const noexcept { return m_ht.max_size(); }
+    /*
+     * Modifiers
+     */
+    void clear() noexcept { m_ht.clear(); }
+    std::pair<iterator, bool> insert(const value_type& value) { 
+        return m_ht.insert(value); 
+    }
+    template<class P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type* = nullptr>
+    std::pair<iterator, bool> insert(P&& value) { 
+        return m_ht.emplace(std::forward<P>(value)); 
+    }
+    std::pair<iterator, bool> insert(value_type&& value) { 
+        return m_ht.insert(std::move(value)); 
+    }
+    iterator insert(const_iterator hint, const value_type& value) { 
+        return m_ht.insert_hint(hint, value); 
+    }
+    template<class P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type* = nullptr>
+    iterator insert(const_iterator hint, P&& value) { 
+        return m_ht.emplace_hint(hint, std::forward<P>(value));
+    }
+    iterator insert(const_iterator hint, value_type&& value) { 
+        return m_ht.insert_hint(hint, std::move(value)); 
+    }
+    template<class InputIt>
+    void insert(InputIt first, InputIt last) { 
+        m_ht.insert(first, last); 
+    }
+    void insert(std::initializer_list<value_type> ilist) { 
+        m_ht.insert(ilist.begin(), ilist.end()); 
+    }
+    template<class M>
+    std::pair<iterator, bool> insert_or_assign(const key_type& k, M&& obj) { 
+        return m_ht.insert_or_assign(k, std::forward<M>(obj)); 
+    }
+    template<class M>
+    std::pair<iterator, bool> insert_or_assign(key_type&& k, M&& obj) { 
+        return m_ht.insert_or_assign(std::move(k), std::forward<M>(obj)); 
+    }
+    template<class M>
+    iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) {
+        return m_ht.insert_or_assign(hint, k, std::forward<M>(obj));
+    }
+    template<class M>
+    iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) {
+        return m_ht.insert_or_assign(hint, std::move(k), std::forward<M>(obj));
+    }
+    /**
+     * Due to the way elements are stored, emplace will need to move or copy the key-value once.
+     * The method is equivalent to insert(value_type(std::forward<Args>(args)...));
+     * 
+     * Mainly here for compatibility with the std::unordered_map interface.
+     */
+    template<class... Args>
+    std::pair<iterator, bool> emplace(Args&&... args) { 
+        return m_ht.emplace(std::forward<Args>(args)...); 
+    }
+    /**
+     * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once.
+     * The method is equivalent to insert(hint, value_type(std::forward<Args>(args)...));
+     * 
+     * Mainly here for compatibility with the std::unordered_map interface.
+     */
+    template<class... Args>
+    iterator emplace_hint(const_iterator hint, Args&&... args) {
+        return m_ht.emplace_hint(hint, std::forward<Args>(args)...);
+    }
+    template<class... Args>
+    std::pair<iterator, bool> try_emplace(const key_type& k, Args&&... args) { 
+        return m_ht.try_emplace(k, std::forward<Args>(args)...);
+    }
+    template<class... Args>
+    std::pair<iterator, bool> try_emplace(key_type&& k, Args&&... args) {
+        return m_ht.try_emplace(std::move(k), std::forward<Args>(args)...);
+    }
+    template<class... Args>
+    iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) {
+        return m_ht.try_emplace_hint(hint, k, std::forward<Args>(args)...);
+    }
+    template<class... Args>
+    iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) {
+        return m_ht.try_emplace_hint(hint, std::move(k), std::forward<Args>(args)...);
+    }
+    iterator erase(iterator pos) { return m_ht.erase(pos); }
+    iterator erase(const_iterator pos) { return m_ht.erase(pos); }
+    iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); }
+    size_type erase(const key_type& key) { return m_ht.erase(key); }
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash.
+     */    
+    size_type erase(const key_type& key, std::size_t precalculated_hash) { 
+        return m_ht.erase(key, precalculated_hash); 
+    }
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type erase(const K& key) { return m_ht.erase(key); }
+    /**
+     * @copydoc erase(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash.
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type erase(const K& key, std::size_t precalculated_hash) { 
+        return m_ht.erase(key, precalculated_hash); 
+    }
+    void swap(robin_map& other) { other.m_ht.swap(m_ht); }
+    /*
+     * Lookup
+     */
+    T& at(const Key& key) { return m_ht.at(key); }
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    T& at(const Key& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); }
+    const T& at(const Key& key) const { return m_ht.at(key); }
+    /**
+     * @copydoc at(const Key& key, std::size_t precalculated_hash)
+     */
+    const T& at(const Key& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); }
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    T& at(const K& key) { return m_ht.at(key); }
+    /**
+     * @copydoc at(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    T& at(const K& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); }
+    /**
+     * @copydoc at(const K& key)
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    const T& at(const K& key) const { return m_ht.at(key); }
+    /**
+     * @copydoc at(const K& key, std::size_t precalculated_hash)
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    const T& at(const K& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); }
+    T& operator[](const Key& key) { return m_ht[key]; }    
+    T& operator[](Key&& key) { return m_ht[std::move(key)]; }
+    size_type count(const Key& key) const { return m_ht.count(key); }
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    size_type count(const Key& key, std::size_t precalculated_hash) const { 
+        return m_ht.count(key, precalculated_hash); 
+    }
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type count(const K& key) const { return m_ht.count(key); }
+    /**
+     * @copydoc count(const K& key) const
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */     
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); }
+    iterator find(const Key& key) { return m_ht.find(key); }
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); }
+    const_iterator find(const Key& key) const { return m_ht.find(key); }
+    /**
+     * @copydoc find(const Key& key, std::size_t precalculated_hash)
+     */
+    const_iterator find(const Key& key, std::size_t precalculated_hash) const { 
+        return m_ht.find(key, precalculated_hash); 
+    }
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    iterator find(const K& key) { return m_ht.find(key); }
+    /**
+     * @copydoc find(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); }
+    /**
+     * @copydoc find(const K& key)
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    const_iterator find(const K& key) const { return m_ht.find(key); }
+    /**
+     * @copydoc find(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    const_iterator find(const K& key, std::size_t precalculated_hash) const { 
+        return m_ht.find(key, precalculated_hash); 
+    }
+    std::pair<iterator, iterator> equal_range(const Key& key) { return m_ht.equal_range(key); }
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    std::pair<iterator, iterator> equal_range(const Key& key, std::size_t precalculated_hash) { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    std::pair<const_iterator, const_iterator> equal_range(const Key& key) const { return m_ht.equal_range(key); }
+    /**
+     * @copydoc equal_range(const Key& key, std::size_t precalculated_hash)
+     */
+    std::pair<const_iterator, const_iterator> equal_range(const Key& key, std::size_t precalculated_hash) const { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<iterator, iterator> equal_range(const K& key) { return m_ht.equal_range(key); }
+    /**
+     * @copydoc equal_range(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<iterator, iterator> equal_range(const K& key, std::size_t precalculated_hash) { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    /**
+     * @copydoc equal_range(const K& key)
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<const_iterator, const_iterator> equal_range(const K& key) const { return m_ht.equal_range(key); }
+    /**
+     * @copydoc equal_range(const K& key, std::size_t precalculated_hash)
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<const_iterator, const_iterator> equal_range(const K& key, std::size_t precalculated_hash) const { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    /*
+     * Bucket interface 
+     */
+    size_type bucket_count() const { return m_ht.bucket_count(); }
+    size_type max_bucket_count() const { return m_ht.max_bucket_count(); }
+    /*
+     *  Hash policy 
+     */
+    float load_factor() const { return m_ht.load_factor(); }
+    float min_load_factor() const { return m_ht.min_load_factor(); }
+    float max_load_factor() const { return m_ht.max_load_factor(); }
+    /**
+     * Set the `min_load_factor` to `ml`. When the `load_factor` of the map goes
+     * below `min_load_factor` after some erase operations, the map will be
+     * shrunk when an insertion occurs. The erase method itself never shrinks
+     * the map.
+     * 
+     * The default value of `min_load_factor` is 0.0f, the map never shrinks by default.
+     */
+    void min_load_factor(float ml) { m_ht.min_load_factor(ml); }
+    void max_load_factor(float ml) { m_ht.max_load_factor(ml); }
+    void rehash(size_type count) { m_ht.rehash(count); }
+    void reserve(size_type count) { m_ht.reserve(count); }
+    /*
+     * Observers
+     */
+    hasher hash_function() const { return m_ht.hash_function(); }
+    key_equal key_eq() const { return m_ht.key_eq(); }
+    /*
+     * Other
+     */
+    /**
+     * Convert a const_iterator to an iterator.
+     */
+    iterator mutable_iterator(const_iterator pos) {
+        return m_ht.mutable_iterator(pos);
+    }
+    friend bool operator==(const robin_map& lhs, const robin_map& rhs) {
+        if(lhs.size() != rhs.size()) {
+            return false;
+        }
+        for(const auto& element_lhs: lhs) {
+            const auto it_element_rhs = rhs.find(element_lhs.first);
+            if(it_element_rhs == rhs.cend() || element_lhs.second != it_element_rhs->second) {
+                return false;
+            }
+        }
+        return true;
+    }
+    friend bool operator!=(const robin_map& lhs, const robin_map& rhs) {
+        return !operator==(lhs, rhs);
+    }
+    friend void swap(robin_map& lhs, robin_map& rhs) {
+        lhs.swap(rhs);
+    }
+private:
+    ht m_ht;
+};
+/**
+ * Same as `tsl::robin_map<Key, T, Hash, KeyEqual, Allocator, StoreHash, tsl::rh::prime_growth_policy>`.
+ */
+template<class Key, 
+         class T, 
+         class Hash = std::hash<Key>,
+         class KeyEqual = std::equal_to<Key>,
+         class Allocator = std::allocator<std::pair<Key, T>>,
+         bool StoreHash = false>
+using robin_pg_map = robin_map<Key, T, Hash, KeyEqual, Allocator, StoreHash, tsl::rh::prime_growth_policy>;
+} // end namespace tsl
+#endif
--- a/include/utility/timer.h
+++ b/include/utility/timer.h
@@ -14,11 +14,14 @@
 #pragma once
 #include <chrono>
+#ifdef SPCONV_CUDA
 #include <cuda_runtime_api.h>
+#endif
 #include <iostream>
 namespace spconv {
+#ifdef SPCONV_CUDA
 template <typename TimeT = std::chrono::microseconds> struct CudaContextTimer {
  CudaContextTimer() {
    cudaDeviceSynchronize();
@@ -36,6 +39,7 @@ template <typename TimeT = std::chrono::microseconds> struct CudaContextTimer {
 private:
  std::chrono::time_point<std::chrono::steady_clock> mCurTime;
 };
+#endif
 template <typename TimeT = std::chrono::microseconds> struct CPUTimer {
  CPUTimer() { mCurTime = std::chrono::steady_clock::now(); }

--- a/setup.py
+++ b/setup.py
@@ -45,8 +45,16 @@ class CMakeBuild(build_ext):
                      '-DCMAKE_PREFIX_PATH={}'.format(LIBTORCH_ROOT),
                      '-DPYBIND11_PYTHON_VERSION={}'.format(PYTHON_VERSION),
                      '-DSPCONV_BuildTests=OFF',
-                      '-DCMAKE_CUDA_FLAGS="--expt-relaxed-constexpr"'
                      ] #  -arch=sm_61
+        if not torch.cuda.is_available():
+            cmake_args += ['-DSPCONV_BuildCUDA=OFF']
+        else:
+            cuda_flags = ["\"--expt-relaxed-constexpr\""]
+            # must add following flags to use at::Half
+            # but will remove raw half operators.
+            cuda_flags += ["-D__CUDA_NO_HALF_OPERATORS__", "-D__CUDA_NO_HALF_CONVERSIONS__"]
+            cuda_flags += ["-D__CUDA_NO_HALF2_OPERATORS__"] 
+            cmake_args += ['-DCMAKE_CUDA_FLAGS=' + " ".join(cuda_flags)]
        cfg = 'Debug' if self.debug else 'Release'
        assert cfg == "Release", "pytorch ops don't support debug build."
        build_args = ['--config', cfg]

--- a/spconv/conv.py
+++ b/spconv/conv.py
@@ -70,7 +70,7 @@ class SparseConvolution(SparseModule):
                 inverse=False,
                 indice_key=None,
                 fused_bn=False,
-                 use_hash=False):
+                 use_hash=True):
        super(SparseConvolution, self).__init__()
        assert groups == 1
        if not isinstance(kernel_size, (list, tuple)):
@@ -136,7 +136,6 @@ class SparseConvolution(SparseModule):
                out_spatial_shape = ops.get_conv_output_size(
                    spatial_shape, self.kernel_size, self.stride, self.padding,
                    self.dilation)
        else:
            out_spatial_shape = spatial_shape
        # input.update_grid(out_spatial_shape)
@@ -222,7 +221,7 @@ class SparseConv2d(SparseConvolution):
                 groups=1,
                 bias=True,
                 indice_key=None,
-                 use_hash=False):
+                 use_hash=True):
        super(SparseConv2d, self).__init__(
            2,
            in_channels,
@@ -248,7 +247,7 @@ class SparseConv3d(SparseConvolution):
                 groups=1,
                 bias=True,
                 indice_key=None,
-                 use_hash=False):
+                 use_hash=True):
        super(SparseConv3d, self).__init__(
            3,
            in_channels,
@@ -274,7 +273,7 @@ class SparseConv4d(SparseConvolution):
                 groups=1,
                 bias=True,
                 indice_key=None,
-                 use_hash=False):
+                 use_hash=True):
        super(SparseConv4d, self).__init__(
            4,
            in_channels,
@@ -300,7 +299,7 @@ class SparseConvTranspose2d(SparseConvolution):
                 groups=1,
                 bias=True,
                 indice_key=None,
-                 use_hash=False):
+                 use_hash=True):
        super(SparseConvTranspose2d, self).__init__(
            2,
            in_channels,
@@ -327,7 +326,7 @@ class SparseConvTranspose3d(SparseConvolution):
                 groups=1,
                 bias=True,
                 indice_key=None,
-                 use_hash=False):
+                 use_hash=True):
        super(SparseConvTranspose3d, self).__init__(
            3,
            in_channels,
@@ -388,7 +387,7 @@ class SubMConv2d(SparseConvolution):
                 groups=1,
                 bias=True,
                 indice_key=None,
-                 use_hash=False):
+                 use_hash=True):
        super(SubMConv2d, self).__init__(
            2,
            in_channels,
@@ -415,7 +414,7 @@ class SubMConv3d(SparseConvolution):
                 groups=1,
                 bias=True,
                 indice_key=None,
-                 use_hash=False):
+                 use_hash=True):
        super(SubMConv3d, self).__init__(
            3,
            in_channels,
@@ -442,7 +441,7 @@ class SubMConv4d(SparseConvolution):
                 groups=1,
                 bias=True,
                 indice_key=None,
-                 use_hash=False):
+                 use_hash=True):
        super(SubMConv4d, self).__init__(
            4,
            in_channels,

--- a/spconv/ops.py
+++ b/spconv/ops.py
@@ -88,8 +88,10 @@ def get_indice_pairs(indices,
            get_indice_pairs_func = torch.ops.spconv.get_indice_pairs_4d
        else:
            raise NotImplementedError
-        return get_indice_pairs_func(indices, batch_size, out_shape, spatial_shape, ksize,
+        res = get_indice_pairs_func(indices, batch_size, out_shape, spatial_shape, ksize,
                            stride, padding, dilation, out_padding, int(subm), int(transpose), int(use_hash))
+        return res
    else:
        if ndim == 2:
            get_indice_pairs_func = torch.ops.spconv.get_indice_pairs_grid_2d

--- a/spconv/utils/__init__.py
+++ b/spconv/utils/__init__.py
@@ -15,10 +15,13 @@
 import numpy as np
 from spconv import spconv_utils
-from spconv.spconv_utils import (
+from spconv.spconv_utils import (non_max_suppression_cpu, points_to_voxel_3d_np,
-    non_max_suppression, non_max_suppression_cpu, points_to_voxel_3d_np,
    points_to_voxel_3d_np_mean, points_to_voxel_3d_with_filtering,
    rbbox_intersection, rbbox_iou, rotate_non_max_suppression_cpu)
+try:
+    from spconv.spconv_utils import non_max_suppression
+except ImportError:
+    pass
 def points_to_voxel(points,

--- a/src/cuhash/CMakeLists.txt
+++ b/src/cuhash/CMakeLists.txt
+add_library(cuhash SHARED hash_functions.cu hash_table.cpp hash_table.cu)
+target_include_directories(cuhash PRIVATE ${ALL_INCLUDE} )
+set_property(TARGET cuhash PROPERTY CUDA_STANDARD 14)
+set_property(TARGET cuhash PROPERTY CXX_STANDARD 14)
+set_target_properties(cuhash PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+target_link_libraries(cuhash PRIVATE ${ALL_LIBS})
+install (TARGETS cuhash DESTINATION lib)
+if (SPCONV_BuildTests)
+    add_executable(cuhash_test main.cc)
+    target_include_directories(cuhash_test PRIVATE ${ALL_INCLUDE} )
+    set_property(TARGET cuhash_test PROPERTY CUDA_STANDARD 14)
+    set_property(TARGET cuhash_test PROPERTY CXX_STANDARD 14)
+    set_target_properties(cuhash_test PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+    target_link_libraries(cuhash_test PRIVATE ${ALL_LIBS} cuhash)
+    install (TARGETS cuhash_test DESTINATION bin)
+endif()
\ No newline at end of file
--- a/src/hash/debugging.cpp
+++ b/src/hash/debugging.cpp
@@ -15,14 +15,14 @@
 * @brief Debugging/statistics/performance utilities for hash tables.
 */
-#include <hash/debugging.h>
+#include <cuhash/debugging.h>
-#include <hash/definitions.h>
+#include <cuhash/definitions.h>
 #include <algorithm>
 #include <cstring>
-#include <hash/cuda_util.h>
+#include <cuhash/cuda_util.h>
-namespace cudahash {
+namespace cuhash {
 void OutputRetrievalStatistics(const unsigned  n_queries,

--- a/src/hash/debugging.cu
+++ b/src/hash/debugging.cu
@@ -15,14 +15,14 @@
 * @brief Debugging/statistics/performance utilities for hash tables.
 */
-#include <hash/debugging.h>
+#include <cuhash/debugging.h>
-#include <hash/definitions.h>
+#include <cuhash/definitions.h>
-#include <hash/hash_table.cuh>
+#include <cuhash/hash_table.cuh>
 #include <algorithm>
-#include <hash/cuda_util.h>
+#include <cuhash/cuda_util.h>
-namespace cudahash {
+namespace cuhash {
 //! Debugging function: Takes statistics on the hash functions' distribution.
@@ -231,9 +231,9 @@ bool CheckAssignedSameSlot(const unsigned  N,
 void PrintStashContents(const Entry *d_stash) {
-  Entry *stash = new Entry[cudahash::kStashSize];
+  Entry *stash = new Entry[cuhash::kStashSize];
-  CUDA_SAFE_CALL(cudaMemcpy(stash, d_stash, sizeof(Entry) * cudahash::kStashSize, cudaMemcpyDeviceToHost));
+  CUDA_SAFE_CALL(cudaMemcpy(stash, d_stash, sizeof(Entry) * cuhash::kStashSize, cudaMemcpyDeviceToHost));
-  for (unsigned i = 0; i < cudahash::kStashSize; ++i) {
+  for (unsigned i = 0; i < cuhash::kStashSize; ++i) {
    if (get_key(stash[i]) != kKeyEmpty) {
      char buffer[256];
      sprintf(buffer, "Stash[%u]: %u = %u", i, get_key(stash[i]), get_value(stash[i]));

--- a/src/hash/hash_functions.cu
+++ b/src/hash/hash_functions.cu
-#include <hash/hash_table.h>
+#include <cuhash/hash_table.h>
-#include <hash/debugging.h>
+#include <cuhash/debugging.h>
+#include <cassert>
+#include <random>
-#include <hash/mt19937ar.h>
+namespace cuhash {
+std::random_device random_dev;
-#include <cassert>
+std::mt19937 random_engine(random_dev());
+std::uniform_int_distribution<unsigned> uint_distribution;
-namespace cudahash {
+unsigned generate_random_uint32(){
+  return uint_distribution(random_engine);
+}
 void GenerateFunctions(const unsigned  N,
                       const unsigned  num_keys,
@@ -19,9 +26,11 @@ void GenerateFunctions(const unsigned  N,
    // Generate a set of hash function constants for this build attempt.
    for (unsigned i = 0 ; i < N; ++i) {
-      unsigned new_a = genrand_int32() % kPrimeDivisor;
+      // uint_distribution(random_engine) % kPrimeDivisor;
+      // genrand_int32() % kPrimeDivisor;
+      unsigned new_a = generate_random_uint32() % kPrimeDivisor;
      constants[i].x = (1 > new_a ? 1 : new_a);
-      constants[i].y = genrand_int32() % kPrimeDivisor;
+      constants[i].y = generate_random_uint32() % kPrimeDivisor;
    }
 #ifdef FORCEFULLY_GENERATE_NO_CYCLES

--- a/src/hash/hash_table.cpp
+++ b/src/hash/hash_table.cpp
@@ -14,20 +14,18 @@
 * @brief Implements a basic hash table that stores one value per key.
 */
-#include <hash/hash_table.h>
+#include <cuhash/hash_table.h>
-#include <hash/debugging.h>
+#include <cuhash/debugging.h>
 #include <algorithm>
 #include <cmath>
 #include <cstdio>
 #include <cstring>
 #include <limits>
-#include <hash/mt19937ar.h>
 #include <cuda_runtime_api.h>
-#include <hash/cuda_util.h>
+#include <cuhash/cuda_util.h>
-namespace cudahash {
+namespace cuhash {
 char buffer[256];
@@ -164,8 +162,8 @@ bool HashTable::Build(const unsigned  n,
        else
            constants_5_.Generate(n, d_keys,table_size_);
-        stash_constants_.x = std::max(1lu, genrand_int32()) % kPrimeDivisor;
+        stash_constants_.x = std::max(1u, generate_random_uint32()) % kPrimeDivisor;
-        stash_constants_.y = genrand_int32() % kPrimeDivisor;
+        stash_constants_.y = generate_random_uint32() % kPrimeDivisor;
        stash_count_ = 0;
        // Initialize memory.
@@ -205,8 +203,8 @@ bool HashTable::Build(const unsigned  n,
    // Copy out the stash size.
    CUDA_SAFE_CALL(cudaMemcpy( &stash_count_, d_stash_count, sizeof(unsigned), cudaMemcpyDeviceToHost ));
    if (stash_count_ && num_failures == 0) {
-        sprintf(buffer, "Stash size: %u", stash_count_);
+        // sprintf(buffer, "Stash size: %u", stash_count_);
-        PrintMessage(buffer, true);
+        // PrintMessage(buffer, true);
 #ifdef _DEBUG
        PrintStashContents(d_contents_ + table_size_);
@@ -226,7 +224,7 @@ bool HashTable::Build(const unsigned  n,
        sprintf(buffer, "Completely failed to build");
        PrintMessage(buffer, true);
    } else if (num_attempts > 1) {
-        sprintf(buffer, "Needed %u attempts to build", num_attempts);
+        sprintf(buffer, "Needed %u attempts to build, you can ignore this message.", num_attempts);
        PrintMessage(buffer, true);
    }

--- a/src/hash/hash_table.cu
+++ b/src/hash/hash_table.cu
@@ -14,14 +14,14 @@
 * @brief Hides all of the CUDA calls from the actual CPP file.
 */
-#include <hash/cuda_util.h>
+#include <cuhash/cuda_util.h>
-#include <hash/debugging.h>
+#include <cuhash/debugging.h>
-#include <hash/definitions.h>
+#include <cuhash/definitions.h>
-#include <hash/hash_table.cuh>
+#include <cuhash/hash_table.cuh>
 #include <cuda.h>
-namespace cudahash {
+namespace cuhash {
 namespace CUDAWrapper {
    void ClearTable(const unsigned  slots_in_table,

--- a/src/hash/main.cc
+++ b/src/hash/main.cc
-#include <hash/hash_table.h>
+#include <cuhash/hash_table.h>
 #include <cuda.h>
 int main(){
-    auto table = cudahash::HashTable();
+    auto table = cuhash::HashTable();
    table.Initialize(10, 2.0);
    const int N = 10;

--- a/src/hash/CMakeLists.txt
+++ b/src/hash/CMakeLists.txt
-add_library(cudahash SHARED hash_functions.cu hash_table.cpp hash_table.cu 
-                            mt19937ar.cpp)
-target_include_directories(cudahash PRIVATE ${ALL_INCLUDE} )
-set_property(TARGET cudahash PROPERTY CUDA_STANDARD 14)
-set_property(TARGET cudahash PROPERTY CXX_STANDARD 14)
-set_target_properties(cudahash PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
-target_link_libraries(cudahash PRIVATE ${ALL_LIBS})
-install (TARGETS cudahash DESTINATION lib)
-add_executable(cudahash_test main.cc)
-target_include_directories(cudahash_test PRIVATE ${ALL_INCLUDE} )
-set_property(TARGET cudahash_test PROPERTY CUDA_STANDARD 14)
-set_property(TARGET cudahash_test PROPERTY CXX_STANDARD 14)
-set_target_properties(cudahash_test PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
-target_link_libraries(cudahash_test PRIVATE ${ALL_LIBS} cudahash)
-install (TARGETS cudahash_test DESTINATION bin)
--- a/src/hash/mt19937ar.cpp
+++ b/src/hash/mt19937ar.cpp
-/*
-   A C-program for MT19937, with initialization improved 2002/1/26.
-   Coded by Takuji Nishimura and Makoto Matsumoto.
-   Before using, initialize the state by using init_genrand(seed)
-   or init_by_array(init_key, key_length).
-   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
-   All rights reserved.
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions
-   are met:
-     1. Redistributions of source code must retain the above copyright
-        notice, this list of conditions and the following disclaimer.
-     2. Redistributions in binary form must reproduce the above copyright
-        notice, this list of conditions and the following disclaimer in the
-        documentation and/or other materials provided with the distribution.
-     3. The names of its contributors may not be used to endorse or promote
-        products derived from this software without specific prior written
-        permission.
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-   Any feedback is very welcome.
-   http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
-   email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
-*/
-#include <stdio.h>
-/* Period parameters */
-#define N 624
-#define M 397
-#define MATRIX_A 0x9908b0dfUL   /* constant vector a */
-#define UPPER_MASK 0x80000000UL /* most significant w-r bits */
-#define LOWER_MASK 0x7fffffffUL /* least significant r bits */
-static unsigned long mt[N]; /* the array for the state vector  */
-static int mti=N+1; /* mti==N+1 means mt[N] is not initialized */
-/* initializes mt[N] with a seed */
-void init_genrand(unsigned long s)
-{
-    mt[0]= s & 0xffffffffUL;
-    for (mti=1; mti<N; mti++) {
-        mt[mti] =
-            (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
-        /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
-        /* In the previous versions, MSBs of the seed affect   */
-        /* only MSBs of the array mt[].                        */
-        /* 2002/01/09 modified by Makoto Matsumoto             */
-        mt[mti] &= 0xffffffffUL;
-        /* for >32 bit machines */
-    }
-}
-/* initialize by an array with array-length */
-/* init_key is the array for initializing keys */
-/* key_length is its length */
-/* slight change for C++, 2004/2/26 */
-void init_by_array(unsigned long init_key[], int key_length)
-{
-    int i, j, k;
-    init_genrand(19650218UL);
-    i=1; j=0;
-    k = (N>key_length ? N : key_length);
-    for (; k; k--) {
-        mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1664525UL))
-          + init_key[j] + j; /* non linear */
-        mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
-        i++; j++;
-        if (i>=N) { mt[0] = mt[N-1]; i=1; }
-        if (j>=key_length) j=0;
-    }
-    for (k=N-1; k; k--) {
-        mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1566083941UL))
-          - i; /* non linear */
-        mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
-        i++;
-        if (i>=N) { mt[0] = mt[N-1]; i=1; }
-    }
-    mt[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */
-}
-/* generates a random number on [0,0xffffffff]-interval */
-unsigned long genrand_int32(void)
-{
-    unsigned long y;
-    static unsigned long mag01[2]={0x0UL, MATRIX_A};
-    /* mag01[x] = x * MATRIX_A  for x=0,1 */
-    if (mti >= N) { /* generate N words at one time */
-        int kk;
-        if (mti == N+1)   /* if init_genrand() has not been called, */
-            init_genrand(5489UL); /* a default initial seed is used */
-        for (kk=0;kk<N-M;kk++) {
-            y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
-            mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & 0x1UL];
-        }
-        for (;kk<N-1;kk++) {
-            y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
-            mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & 0x1UL];
-        }
-        y = (mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK);
-        mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & 0x1UL];
-        mti = 0;
-    }
-    y = mt[mti++];
-    /* Tempering */
-    y ^= (y >> 11);
-    y ^= (y << 7) & 0x9d2c5680UL;
-    y ^= (y << 15) & 0xefc60000UL;
-    y ^= (y >> 18);
-    return y;
-}
-/* generates a random number on [0,0x7fffffff]-interval */
-long genrand_int31(void)
-{
-    return (long)(genrand_int32()>>1);
-}
-/* generates a random number on [0,1]-real-interval */
-double genrand_real1(void)
-{
-    return genrand_int32()*(1.0/4294967295.0);
-    /* divided by 2^32-1 */
-}
-/* generates a random number on [0,1)-real-interval */
-double genrand_real2(void)
-{
-    return genrand_int32()*(1.0/4294967296.0);
-    /* divided by 2^32 */
-}
-/* generates a random number on (0,1)-real-interval */
-double genrand_real3(void)
-{
-    return (((double)genrand_int32()) + 0.5)*(1.0/4294967296.0);
-    /* divided by 2^32 */
-}
-/* generates a random number on [0,1) with 53-bit resolution*/
-double genrand_res53(void)
-{
-    unsigned long a=genrand_int32()>>5, b=genrand_int32()>>6;
-    return(a*67108864.0+b)*(1.0/9007199254740992.0);
-}
-/* These real versions are due to Isaku Wada, 2002/01/09 added */
--- a/src/spconv/CMakeLists.txt
+++ b/src/spconv/CMakeLists.txt
-add_library(spconv SHARED all.cc indice.cc indice.cu 
+set(ALL_FILES all.cc indice.cc reordering.cc maxpool.cc nms.cc)
-            reordering.cc reordering.cu maxpool.cc maxpool.cu nms.cc
+if (SPCONV_BuildCUDA)
-            pillar_scatter.cu)
+    set(ALL_FILES ${ALL_FILES} indice.cu reordering.cu maxpool.cu pillar_scatter.cu)
+endif()
+add_library(spconv SHARED ${ALL_FILES})
 target_include_directories(spconv PRIVATE ${ALL_INCLUDE} )
 set_property(TARGET spconv PROPERTY CUDA_STANDARD 14)
 set_property(TARGET spconv PROPERTY CXX_STANDARD 14)
 set_target_properties(spconv PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
-target_link_libraries(spconv PRIVATE ${ALL_LIBS} cudahash)
+if (SPCONV_BuildCUDA)
+    target_link_libraries(spconv PRIVATE ${ALL_LIBS} cuhash)
+else()
+    target_link_libraries(spconv PRIVATE ${ALL_LIBS})
+endif()
 install (TARGETS spconv DESTINATION lib)
--- a/src/spconv/all.cc
+++ b/src/spconv/all.cc
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include <cuda_runtime_api.h>
 #include <spconv/pool_ops.h>
 #include <spconv/spconv_ops.h>
 #include <spconv/pillar_scatter_ops.h>
@@ -35,9 +34,9 @@ static auto registry =
        .op("spconv::indice_maxpool_fp32", &spconv::indiceMaxPool<float>)
        .op("spconv::indice_maxpool_backward_fp32",
            &spconv::indiceMaxPoolBackward<float>)
-        // .op("spconv::indice_maxpool_half", &spconv::indiceMaxPool<at::Half>)
+        .op("spconv::indice_maxpool_half", &spconv::indiceMaxPool<at::Half>)
-        // .op("spconv::indice_maxpool_backward_half",
+        .op("spconv::indice_maxpool_backward_half",
-        //     &spconv::indiceMaxPoolBackward<at::Half>)
+            &spconv::indiceMaxPoolBackward<at::Half>)
        .op("spconv::nms", &spconv::nonMaxSuppression<float>)
        .op("spconv::pillar_scatter_float", &spconv::pointPillarScatter<float>)
        .op("spconv::pillar_scatter_half", &spconv::pointPillarScatter<at::Half>);
\ No newline at end of file
--- a/src/spconv/indice.cu
+++ b/src/spconv/indice.cu
@@ -22,7 +22,7 @@
 #include <tensorview/tensorview.h>
 #include <type_traits>
 #include <utility/timer.h>
-#include <hash/hash_table.h>
+#include <cuhash/hash_table.h>
 namespace spconv {
 namespace functor {
@@ -78,24 +78,28 @@ struct CreateConvIndicePairFunctorP2<tv::GPU, Index, IndexGrid, NDim> {
    auto numActIn = indicesIn.dim(0);
    if (numActIn == 0)
      return 0;
-    Index numAct = indicePairUnique.dim(0) - 1;
+    // after unique, there is a std::numeric_limits<int>::max() in the end of indicePairUnique
+    Index numAct = indicePairUnique.dim(0) - 1; 
    if (useHash){
-      auto table = cudahash::HashTable();
+      auto table = cuhash::HashTable();
-      table.Initialize(numAct, 2.0);
+      // std::cout << "create " << numAct << " size table..." << std::endl;
-      Index *d_values = nullptr;
+      table.Initialize(numAct, 2.0, 4);
-      cudaMalloc((void**)&d_values, sizeof(Index) * numAct);
+      unsigned *d_values = nullptr;
+      cudaMalloc((void**)&d_values, sizeof(unsigned) * numAct);
      TV_CHECK_CUDA_ERR_V2("cudaMalloc failed");
-      arangeKernel<Index><<<tv::launch::getBlocks(numAct), tv::launch::CUDA_NUM_THREADS, 0,
+      arangeKernel<unsigned><<<tv::launch::getBlocks(numAct), tv::launch::CUDA_NUM_THREADS, 0,
            d.getStream()>>>(d_values, numAct);
      bool res = table.Build(numAct, reinterpret_cast<unsigned*>(indicePairUnique.data()), 
-                reinterpret_cast<unsigned*>(d_values));
+                d_values);
-      TV_ASSERT_RT_ERR(res, "err");
+      cudaFree(d_values);
+      if (!res){
+        return -1; //use -1 to tell outside use CPU implementation
+      }
      assignIndiceOutKernel<Index, NDim>
          <<<tv::launch::getBlocks(numAct), tv::launch::CUDA_NUM_THREADS, 0,
            d.getStream()>>>(indicesOut, numAct,
                          indicePairUnique, outSpatialShape, batchSize);
      TV_CHECK_CUDA_ERR_V2("assignGridAndIndiceOutKernel failed");
-      cudaFree(d_values);
      auto tableSize = table.get_table_size();
      auto tableData = table.data();
      auto constants = table.get_constants_4();
@@ -149,8 +153,9 @@ struct CreateSubMIndicePairFunctor<tv::GPU, Index, IndexGrid, NDim> {
      return 0;
    // auto timer = spconv::CudaContextTimer<>();
    if (useHash){
-      auto table = cudahash::HashTable();
+      auto table = cuhash::HashTable();
-      table.Initialize(numActIn, 2.0);
+      // std::cout << "subm create " << numActIn << " size table..." << std::endl;
+      table.Initialize(numActIn, 2.0, 4);
      unsigned *d_keyvalues = nullptr;
      cudaMalloc((void**)&d_keyvalues, sizeof(unsigned) * numActIn * 2);
      unsigned *d_values = d_keyvalues + numActIn;
@@ -160,8 +165,10 @@ struct CreateSubMIndicePairFunctor<tv::GPU, Index, IndexGrid, NDim> {
      TV_CHECK_CUDA_ERR_V2("prepareSubMHashKernel failed");
      bool res = table.Build(numActIn, reinterpret_cast<unsigned*>(d_keyvalues), 
                reinterpret_cast<unsigned*>(d_values));
-      TV_ASSERT_RT_ERR(res, "err");
      cudaFree(d_keyvalues);
+      if (!res){
+        return -1; //use -1 to tell outside use CPU implementation
+      }
      auto tableSize = table.get_table_size();
      auto tableData = table.data();
      auto constants = table.get_constants_4();