"wrappers/vscode:/vscode.git/clone" did not exist on "6f26c4c5fb39bb65531348e7e1245be49481a845"
Commit 01ed382c authored by yan.yan's avatar yan.yan
Browse files

working on tensor core test

parent 3517290c
// Copyright 2019-2020 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <chrono>
#ifdef TV_CUDA
#include <cuda_runtime_api.h>
#endif
#include <iostream>
namespace tv {
#ifdef TV_CUDA
template <typename TimeT = std::chrono::microseconds> struct CudaContextTimer {
CudaContextTimer() {
cudaDeviceSynchronize();
mCurTime = std::chrono::steady_clock::now();
}
typename TimeT::rep report() {
cudaDeviceSynchronize();
auto duration = std::chrono::duration_cast<TimeT>(
std::chrono::steady_clock::now() - mCurTime);
auto res = duration.count();
mCurTime = std::chrono::steady_clock::now();
return res;
}
template <int Count, typename F>
double benchmark(F &&f, int start = int(Count) * 0.3) {
// std::vector<TimeT::rep> times;
auto res = typename TimeT::rep();
int count = 0;
cudaDeviceSynchronize();
for (int i = 0; i < Count; ++i) {
std::forward<F>(f)();
auto time = report();
if (i >= start) {
// times.push_back(time)
res += time;
count += 1;
}
}
return res / double(count);
}
private:
std::chrono::time_point<std::chrono::steady_clock> mCurTime;
};
#endif
template <typename TimeT = std::chrono::microseconds> struct CPUTimer {
CPUTimer() { mCurTime = std::chrono::steady_clock::now(); }
typename TimeT::rep report() {
auto duration = std::chrono::duration_cast<TimeT>(
std::chrono::steady_clock::now() - mCurTime);
auto res = duration.count();
mCurTime = std::chrono::steady_clock::now();
return res;
}
private:
std::chrono::time_point<std::chrono::steady_clock> mCurTime;
};
} // namespace tv
// Copyright 2019-2020 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "mp_helper.h"
#include <ATen/ATen.h>
#include <tensorview/tensor.h>
#include <tensorview/tensorview.h>
#include <torch/script.h>
#ifdef TV_CUDA
#include <ATen/cuda/CUDAContext.h>
#endif
namespace tv {
#ifdef TV_CUDA
struct TorchGPU : public tv::GPU {
virtual cudaStream_t getStream() const override {
return at::cuda::getCurrentCUDAStream();
}
};
#endif
namespace detail {
template <typename T> struct TypeToTorchDtypeTraits;
template <> struct TypeToTorchDtypeTraits<int32_t> {
static constexpr decltype(torch::kInt32) value = torch::kInt32;
};
template <> struct TypeToTorchDtypeTraits<int16_t> {
static constexpr decltype(torch::kInt32) value = torch::kInt16;
};
template <> struct TypeToTorchDtypeTraits<int8_t> {
static constexpr decltype(torch::kInt8) value = torch::kInt8;
};
template <> struct TypeToTorchDtypeTraits<int64_t> {
static constexpr decltype(torch::kInt32) value = torch::kInt64;
};
template <> struct TypeToTorchDtypeTraits<uint8_t> {
static constexpr decltype(torch::kInt32) value = torch::kUInt8;
};
template <> struct TypeToTorchDtypeTraits<bool> {
static constexpr decltype(torch::kInt32) value = torch::kBool;
};
template <> struct TypeToTorchDtypeTraits<float> {
static constexpr decltype(torch::kInt32) value = torch::kFloat32;
};
template <> struct TypeToTorchDtypeTraits<double> {
static constexpr decltype(torch::kInt32) value = torch::kFloat64;
};
template <> struct TypeToTorchDtypeTraits<at::Half> {
static constexpr decltype(torch::kInt32) value = torch::kHalf;
};
using all_torch_types_t = std::tuple<float, double, int8_t, int16_t, int32_t,
int64_t, uint8_t, bool, at::Half>;
} // namespace detail
template <typename T>
constexpr decltype(torch::kInt32) torch_type_v =
detail::TypeToTorchDtypeTraits<T>::value;
template <class... Ts, typename F>
void dispatch_torch(at::ScalarType t, F &&f) {
static_assert(sizeof...(Ts) > 0, "you need to provide at least one type");
bool notFound = true;
tv::mp_for_each<mp_list<Ts...>>([=, &notFound, &f](auto I) {
if (detail::TypeToTorchDtypeTraits<decltype(I)>::value == t) {
std::forward<F>(f)(decltype(I)());
notFound = false;
}
});
if (notFound) {
std::stringstream ss;
tv::mp_for_each<mp_list<Ts...>>([=, &ss](auto I) {
ss << tv::detail::TypeToString<decltype(I)>::value << " ";
});
TV_THROW_RT_ERR("unknown type", t, ", available:", ss.str());
}
}
template <class T> struct DispatchTorch;
template <template <class...> class T, class... Args>
struct DispatchTorch<T<Args...>> {
template <typename F> inline void operator()(at::ScalarType t, F &&f) {
return dispatch_torch<Args...>(t, std::forward<F>(f));
}
};
template <typename T> void check_torch_dtype(const torch::Tensor &tensor) {
DispatchTorch<detail::all_torch_types_t>()(tensor.scalar_type(), [&](auto I) {
using Ttensor = decltype(I);
constexpr bool val = std::is_same<std::remove_cv_t<T>, Ttensor>::value;
TV_ASSERT_RT_ERR(val, "error");
});
}
template <typename T, int Rank = -1,
template <class> class PtrTraits = DefaultPtrTraits,
typename Tindex = int>
TensorView<T, Rank, PtrTraits, Tindex> torch2tv(const torch::Tensor &tensor) {
using tv_shape_t =
typename TensorView<T, Rank, PtrTraits, Tindex>::tv_shape_t;
check_torch_dtype<T>(tensor);
// TODO stride
if (Rank > 0) {
TV_ASSERT_INVALID_ARG(tensor.dim() == Rank, "error");
}
tv_shape_t shape;
for (auto i : tensor.sizes()) {
shape.push_back(i);
}
return tv::TensorView<T, Rank, PtrTraits, Tindex>(
tensor.data_ptr<std::remove_const_t<T>>(), shape);
}
template <typename T>
torch::Tensor torch_slice_first_axis(torch::Tensor tensor, T start, T end) {
// only torch >= 1.5 have tensor slice.
torch::Tensor res;
auto tensor_shape = tensor.sizes();
std::vector<int64_t> shape(tensor_shape.begin(), tensor_shape.end());
shape[0] = end - start;
uint8_t *ptr = reinterpret_cast<uint8_t *>(tensor.data_ptr());
res = torch::from_blob(ptr + start * tensor.stride(0) * tensor.itemsize(),
torch::IntArrayRef(shape), tensor.options());
return res;
}
namespace detail {
template <> struct TypeToString<at::Half> {
static constexpr const char *value = "half";
};
} // namespace detail
} // namespace tv
\ No newline at end of file
// Copyright 2019-2020 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <tensorview/mp_helper.h>
#include <tensorview/tensorview.h>
#include <ATen/ATen.h>
#include <torch/script.h>
#ifdef TV_CUDA
#include <ATen/cuda/CUDAContext.h>
#endif
namespace tv {
#ifdef TV_CUDA
struct TorchGPU : public tv::GPU {
virtual cudaStream_t getStream() const override {
return at::cuda::getCurrentCUDAStream();
}
};
#endif
template <typename T> void check_torch_dtype(const torch::Tensor &tensor) {
switch (tensor.scalar_type()) {
case at::ScalarType::Double: {
auto val = std::is_same<std::remove_const_t<T>, double>::value;
TV_ASSERT_RT_ERR(val, "error");
break;
}
case at::ScalarType::Float: {
auto val = std::is_same<std::remove_const_t<T>, float>::value;
TV_ASSERT_RT_ERR(val, "error");
break;
}
case at::ScalarType::Int: {
auto val = std::is_same<std::remove_const_t<T>, int>::value;
TV_ASSERT_RT_ERR(val, "error");
break;
}
case at::ScalarType::Half: {
auto val = std::is_same<std::remove_const_t<T>, at::Half>::value;
TV_ASSERT_RT_ERR(val, "error");
break;
}
case at::ScalarType::Long: {
auto val = std::is_same<std::remove_const_t<T>, long>::value;
TV_ASSERT_RT_ERR(val, "error");
break;
}
default:
TV_ASSERT_RT_ERR(false, "error");
}
}
namespace detail {
template <typename T> struct TypeToTorchDtypeTraits;
template <> struct TypeToTorchDtypeTraits<int32_t> {
static constexpr decltype(torch::kInt32) value = torch::kInt32;
};
template <> struct TypeToTorchDtypeTraits<int64_t> {
static constexpr decltype(torch::kInt32) value = torch::kInt64;
};
template <> struct TypeToTorchDtypeTraits<float> {
static constexpr decltype(torch::kInt32) value = torch::kFloat32;
};
template <> struct TypeToTorchDtypeTraits<double> {
static constexpr decltype(torch::kInt32) value = torch::kFloat64;
};
template <> struct TypeToTorchDtypeTraits<at::Half> {
static constexpr decltype(torch::kInt32) value = torch::kHalf;
};
} // namespace detail
template <typename T>
constexpr decltype(torch::kInt32) torch_type_v =
detail::TypeToTorchDtypeTraits<T>::value;
template <typename T> tv::TensorView<T> torch2tv(const torch::Tensor &tensor) {
check_torch_dtype<T>(tensor);
tv::Shape shape;
for (auto i : tensor.sizes()) {
shape.push_back(i);
}
return tv::TensorView<T>(tensor.data_ptr<std::remove_const_t<T>>(), shape);
}
namespace detail {
template <> struct TypeToString<at::Half> {
static constexpr const char *value = "half";
};
} // namespace detail
template <class... Ts, typename F>
void dispatch_torch(at::ScalarType t, F &&f) {
static_assert(sizeof...(Ts) > 0, "you need to provide at least one type");
bool notFound = true;
spconv::tv::mp_for_each<spconv::mp_list<Ts...>>([=, &notFound, &f](auto I) {
if (torch_type_v<decltype(I)> == t) {
std::forward<F>(f)(decltype(I)());
notFound = false;
}
});
if (notFound) {
std::stringstream ss;
spconv::tv::mp_for_each<spconv::mp_list<Ts...>>([=, &ss](auto I) {
ss << tv::detail::TypeToString<decltype(I)>::value << " ";
});
TV_THROW_RT_ERR("unknown type", t, ", available: ", ss.str());
}
}
} // namespace tv
\ No newline at end of file
/**
* MIT License
*
* Copyright (c) 2017 Tessil
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef TSL_ROBIN_GROWTH_POLICY_H
#define TSL_ROBIN_GROWTH_POLICY_H
#include <algorithm>
#include <array>
#include <climits>
#include <cmath>
#include <cstddef>
#include <iterator>
#include <limits>
#include <ratio>
#include <stdexcept>
#ifdef TSL_DEBUG
#define tsl_rh_assert(expr) assert(expr)
#else
#define tsl_rh_assert(expr) (static_cast<void>(0))
#endif
/**
* If exceptions are enabled, throw the exception passed in parameter, otherwise
* call std::terminate.
*/
#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || \
(defined(_MSC_VER) && defined(_CPPUNWIND))) && \
!defined(TSL_NO_EXCEPTIONS)
#define TSL_RH_THROW_OR_TERMINATE(ex, msg) throw ex(msg)
#else
#ifdef NDEBUG
#define TSL_RH_THROW_OR_TERMINATE(ex, msg) std::terminate()
#else
#include <cstdio>
#define TSL_RH_THROW_OR_TERMINATE(ex, msg) \
do { \
std::fprintf(stderr, msg); \
std::terminate(); \
} while (0)
#endif
#endif
#if defined(__GNUC__) || defined(__clang__)
#define TSL_RH_LIKELY(exp) (__builtin_expect(!!(exp), true))
#else
#define TSL_RH_LIKELY(exp) (exp)
#endif
namespace tsl {
namespace rh {
/**
* Grow the hash table by a factor of GrowthFactor keeping the bucket count to a
* power of two. It allows the table to use a mask operation instead of a modulo
* operation to map a hash to a bucket.
*
* GrowthFactor must be a power of two >= 2.
*/
template <std::size_t GrowthFactor> class power_of_two_growth_policy {
public:
/**
* Called on the hash table creation and on rehash. The number of buckets for
* the table is passed in parameter. This number is a minimum, the policy may
* update this value with a higher value if needed (but not lower).
*
* If 0 is given, min_bucket_count_in_out must still be 0 after the policy
* creation and bucket_for_hash must always return 0 in this case.
*/
explicit power_of_two_growth_policy(std::size_t &min_bucket_count_in_out) {
if (min_bucket_count_in_out > max_bucket_count()) {
TSL_RH_THROW_OR_TERMINATE(std::length_error,
"The hash table exceeds its maxmimum size.");
}
if (min_bucket_count_in_out > 0) {
min_bucket_count_in_out =
round_up_to_power_of_two(min_bucket_count_in_out);
m_mask = min_bucket_count_in_out - 1;
} else {
m_mask = 0;
}
}
/**
* Return the bucket [0, bucket_count()) to which the hash belongs.
* If bucket_count() is 0, it must always return 0.
*/
std::size_t bucket_for_hash(std::size_t hash) const noexcept {
return hash & m_mask;
}
/**
* Return the number of buckets that should be used on next growth.
*/
std::size_t next_bucket_count() const {
if ((m_mask + 1) > max_bucket_count() / GrowthFactor) {
TSL_RH_THROW_OR_TERMINATE(std::length_error,
"The hash table exceeds its maxmimum size.");
}
return (m_mask + 1) * GrowthFactor;
}
/**
* Return the maximum number of buckets supported by the policy.
*/
std::size_t max_bucket_count() const {
// Largest power of two.
return (std::numeric_limits<std::size_t>::max() / 2) + 1;
}
/**
* Reset the growth policy as if it was created with a bucket count of 0.
* After a clear, the policy must always return 0 when bucket_for_hash is
* called.
*/
void clear() noexcept { m_mask = 0; }
private:
static std::size_t round_up_to_power_of_two(std::size_t value) {
if (is_power_of_two(value)) {
return value;
}
if (value == 0) {
return 1;
}
--value;
for (std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) {
value |= value >> i;
}
return value + 1;
}
static constexpr bool is_power_of_two(std::size_t value) {
return value != 0 && (value & (value - 1)) == 0;
}
protected:
static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2,
"GrowthFactor must be a power of two >= 2.");
std::size_t m_mask;
};
/**
* Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo
* to map a hash to a bucket. Slower but it can be useful if you want a slower
* growth.
*/
template <class GrowthFactor = std::ratio<3, 2>> class mod_growth_policy {
public:
explicit mod_growth_policy(std::size_t &min_bucket_count_in_out) {
if (min_bucket_count_in_out > max_bucket_count()) {
TSL_RH_THROW_OR_TERMINATE(std::length_error,
"The hash table exceeds its maxmimum size.");
}
if (min_bucket_count_in_out > 0) {
m_mod = min_bucket_count_in_out;
} else {
m_mod = 1;
}
}
std::size_t bucket_for_hash(std::size_t hash) const noexcept {
return hash % m_mod;
}
std::size_t next_bucket_count() const {
if (m_mod == max_bucket_count()) {
TSL_RH_THROW_OR_TERMINATE(std::length_error,
"The hash table exceeds its maxmimum size.");
}
const double next_bucket_count =
std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR);
if (!std::isnormal(next_bucket_count)) {
TSL_RH_THROW_OR_TERMINATE(std::length_error,
"The hash table exceeds its maxmimum size.");
}
if (next_bucket_count > double(max_bucket_count())) {
return max_bucket_count();
} else {
return std::size_t(next_bucket_count);
}
}
std::size_t max_bucket_count() const { return MAX_BUCKET_COUNT; }
void clear() noexcept { m_mod = 1; }
private:
static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR =
1.0 * GrowthFactor::num / GrowthFactor::den;
static const std::size_t MAX_BUCKET_COUNT =
std::size_t(double(std::numeric_limits<std::size_t>::max() /
REHASH_SIZE_MULTIPLICATION_FACTOR));
static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1,
"Growth factor should be >= 1.1.");
std::size_t m_mod;
};
namespace detail {
static constexpr const std::array<std::size_t, 40> PRIMES = {
{1ul, 5ul, 17ul, 29ul, 37ul,
53ul, 67ul, 79ul, 97ul, 131ul,
193ul, 257ul, 389ul, 521ul, 769ul,
1031ul, 1543ul, 2053ul, 3079ul, 6151ul,
12289ul, 24593ul, 49157ul, 98317ul, 196613ul,
393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul,
12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul,
402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul}};
template <unsigned int IPrime>
static constexpr std::size_t mod(std::size_t hash) {
return hash % PRIMES[IPrime];
}
// MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for
// faster modulo as the compiler can optimize the modulo code better with a
// constant known at the compilation.
static constexpr const std::array<std::size_t (*)(std::size_t), 40> MOD_PRIME =
{{&mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>,
&mod<7>, &mod<8>, &mod<9>, &mod<10>, &mod<11>, &mod<12>, &mod<13>,
&mod<14>, &mod<15>, &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>,
&mod<21>, &mod<22>, &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>,
&mod<28>, &mod<29>, &mod<30>, &mod<31>, &mod<32>, &mod<33>, &mod<34>,
&mod<35>, &mod<36>, &mod<37>, &mod<38>, &mod<39>}};
} // namespace detail
/**
* Grow the hash table by using prime numbers as bucket count. Slower than
* tsl::rh::power_of_two_growth_policy in general but will probably distribute
* the values around better in the buckets with a poor hash function.
*
* To allow the compiler to optimize the modulo operation, a lookup table is
* used with constant primes numbers.
*
* With a switch the code would look like:
* \code
* switch(iprime) { // iprime is the current prime of the hash table
* case 0: hash % 5ul;
* break;
* case 1: hash % 17ul;
* break;
* case 2: hash % 29ul;
* break;
* ...
* }
* \endcode
*
* Due to the constant variable in the modulo the compiler is able to optimize
* the operation by a series of multiplications, substractions and shifts.
*
* The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34)
* * 5' in a 64 bits environement.
*/
class prime_growth_policy {
public:
explicit prime_growth_policy(std::size_t &min_bucket_count_in_out) {
auto it_prime = std::lower_bound(
detail::PRIMES.begin(), detail::PRIMES.end(), min_bucket_count_in_out);
if (it_prime == detail::PRIMES.end()) {
TSL_RH_THROW_OR_TERMINATE(std::length_error,
"The hash table exceeds its maxmimum size.");
}
m_iprime = static_cast<unsigned int>(
std::distance(detail::PRIMES.begin(), it_prime));
if (min_bucket_count_in_out > 0) {
min_bucket_count_in_out = *it_prime;
} else {
min_bucket_count_in_out = 0;
}
}
std::size_t bucket_for_hash(std::size_t hash) const noexcept {
return detail::MOD_PRIME[m_iprime](hash);
}
std::size_t next_bucket_count() const {
if (m_iprime + 1 >= detail::PRIMES.size()) {
TSL_RH_THROW_OR_TERMINATE(std::length_error,
"The hash table exceeds its maxmimum size.");
}
return detail::PRIMES[m_iprime + 1];
}
std::size_t max_bucket_count() const { return detail::PRIMES.back(); }
void clear() noexcept { m_iprime = 0; }
private:
unsigned int m_iprime;
static_assert(std::numeric_limits<decltype(m_iprime)>::max() >=
detail::PRIMES.size(),
"The type of m_iprime is not big enough.");
};
} // namespace rh
} // namespace tsl
#endif
/**
* MIT License
*
* Copyright (c) 2017 Tessil
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef TSL_ROBIN_HASH_H
#define TSL_ROBIN_HASH_H
#include "robin_growth_policy.h"
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <exception>
#include <iterator>
#include <limits>
#include <memory>
#include <stdexcept>
#include <tuple>
#include <type_traits>
#include <utility>
#include <vector>
namespace tsl {
namespace detail_robin_hash {
template <typename T> struct make_void { using type = void; };
template <typename T, typename = void>
struct has_is_transparent : std::false_type {};
template <typename T>
struct has_is_transparent<T,
typename make_void<typename T::is_transparent>::type>
: std::true_type {};
template <typename U> struct is_power_of_two_policy : std::false_type {};
template <std::size_t GrowthFactor>
struct is_power_of_two_policy<tsl::rh::power_of_two_growth_policy<GrowthFactor>>
: std::true_type {};
// Only available in C++17, we need to be compatible with C++11
template <class T> const T &clamp(const T &v, const T &lo, const T &hi) {
return std::min(hi, std::max(lo, v));
}
using truncated_hash_type = std::uint_least32_t;
/**
* Helper class that stores a truncated hash if StoreHash is true and nothing
* otherwise.
*/
template <bool StoreHash> class bucket_entry_hash {
public:
bool bucket_hash_equal(std::size_t /*hash*/) const noexcept { return true; }
truncated_hash_type truncated_hash() const noexcept { return 0; }
protected:
void set_hash(truncated_hash_type /*hash*/) noexcept {}
};
template <> class bucket_entry_hash<true> {
public:
bool bucket_hash_equal(std::size_t hash) const noexcept {
return m_hash == truncated_hash_type(hash);
}
truncated_hash_type truncated_hash() const noexcept { return m_hash; }
protected:
void set_hash(truncated_hash_type hash) noexcept {
m_hash = truncated_hash_type(hash);
}
private:
truncated_hash_type m_hash;
};
/**
* Each bucket entry has:
* - A value of type `ValueType`.
* - An integer to store how far the value of the bucket, if any, is from its
* ideal bucket (ex: if the current bucket 5 has the value 'foo' and
* `hash('foo') % nb_buckets` == 3, `dist_from_ideal_bucket()` will return 2 as
* the current value of the bucket is two buckets away from its ideal bucket) If
* there is no value in the bucket (i.e. `empty()` is true)
* `dist_from_ideal_bucket()` will be < 0.
* - A marker which tells us if the bucket is the last bucket of the bucket
* array (useful for the iterator of the hash table).
* - If `StoreHash` is true, 32 bits of the hash of the value, if any, are also
* stored in the bucket. If the size of the hash is more than 32 bits, it is
* truncated. We don't store the full hash as storing the hash is a potential
* opportunity to use the unused space due to the alignement of the bucket_entry
* structure. We can thus potentially store the hash without any extra space
* (which would not be possible with 64 bits of the hash).
*/
template <typename ValueType, bool StoreHash>
class bucket_entry : public bucket_entry_hash<StoreHash> {
using bucket_hash = bucket_entry_hash<StoreHash>;
public:
using value_type = ValueType;
using distance_type = std::int_least16_t;
bucket_entry() noexcept
: bucket_hash(),
m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET),
m_last_bucket(false) {
tsl_rh_assert(empty());
}
bucket_entry(bool last_bucket) noexcept
: bucket_hash(),
m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET),
m_last_bucket(last_bucket) {
tsl_rh_assert(empty());
}
bucket_entry(const bucket_entry &other) noexcept(
std::is_nothrow_copy_constructible<value_type>::value)
: bucket_hash(other),
m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET),
m_last_bucket(other.m_last_bucket) {
if (!other.empty()) {
::new (static_cast<void *>(std::addressof(m_value)))
value_type(other.value());
m_dist_from_ideal_bucket = other.m_dist_from_ideal_bucket;
}
}
/**
* Never really used, but still necessary as we must call resize on an empty
* `std::vector<bucket_entry>`. and we need to support move-only types. See
* robin_hash constructor for details.
*/
bucket_entry(bucket_entry &&other) noexcept(
std::is_nothrow_move_constructible<value_type>::value)
: bucket_hash(std::move(other)),
m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET),
m_last_bucket(other.m_last_bucket) {
if (!other.empty()) {
::new (static_cast<void *>(std::addressof(m_value)))
value_type(std::move(other.value()));
m_dist_from_ideal_bucket = other.m_dist_from_ideal_bucket;
}
}
bucket_entry &operator=(const bucket_entry &other) noexcept(
std::is_nothrow_copy_constructible<value_type>::value) {
if (this != &other) {
clear();
bucket_hash::operator=(other);
if (!other.empty()) {
::new (static_cast<void *>(std::addressof(m_value)))
value_type(other.value());
}
m_dist_from_ideal_bucket = other.m_dist_from_ideal_bucket;
m_last_bucket = other.m_last_bucket;
}
return *this;
}
bucket_entry &operator=(bucket_entry &&) = delete;
~bucket_entry() noexcept { clear(); }
void clear() noexcept {
if (!empty()) {
destroy_value();
m_dist_from_ideal_bucket = EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET;
}
}
bool empty() const noexcept {
return m_dist_from_ideal_bucket == EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET;
}
value_type &value() noexcept {
tsl_rh_assert(!empty());
return *reinterpret_cast<value_type *>(std::addressof(m_value));
}
const value_type &value() const noexcept {
tsl_rh_assert(!empty());
return *reinterpret_cast<const value_type *>(std::addressof(m_value));
}
distance_type dist_from_ideal_bucket() const noexcept {
return m_dist_from_ideal_bucket;
}
bool last_bucket() const noexcept { return m_last_bucket; }
void set_as_last_bucket() noexcept { m_last_bucket = true; }
template <typename... Args>
void set_value_of_empty_bucket(distance_type dist_from_ideal_bucket,
truncated_hash_type hash,
Args &&... value_type_args) {
tsl_rh_assert(dist_from_ideal_bucket >= 0);
tsl_rh_assert(empty());
::new (static_cast<void *>(std::addressof(m_value)))
value_type(std::forward<Args>(value_type_args)...);
this->set_hash(hash);
m_dist_from_ideal_bucket = dist_from_ideal_bucket;
tsl_rh_assert(!empty());
}
void swap_with_value_in_bucket(distance_type &dist_from_ideal_bucket,
truncated_hash_type &hash, value_type &value) {
tsl_rh_assert(!empty());
using std::swap;
swap(value, this->value());
swap(dist_from_ideal_bucket, m_dist_from_ideal_bucket);
// Avoid warning of unused variable if StoreHash is false
(void)hash;
if (StoreHash) {
const truncated_hash_type tmp_hash = this->truncated_hash();
this->set_hash(hash);
hash = tmp_hash;
}
}
static truncated_hash_type truncate_hash(std::size_t hash) noexcept {
return truncated_hash_type(hash);
}
private:
void destroy_value() noexcept {
tsl_rh_assert(!empty());
value().~value_type();
}
private:
using storage = typename std::aligned_storage<sizeof(value_type),
alignof(value_type)>::type;
static const distance_type EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET = -1;
distance_type m_dist_from_ideal_bucket;
bool m_last_bucket;
storage m_value;
};
/**
* Internal common class used by `robin_map` and `robin_set`.
*
* ValueType is what will be stored by `robin_hash` (usually `std::pair<Key, T>`
* for map and `Key` for set).
*
* `KeySelect` should be a `FunctionObject` which takes a `ValueType` in
* parameter and returns a reference to the key.
*
* `ValueSelect` should be a `FunctionObject` which takes a `ValueType` in
* parameter and returns a reference to the value. `ValueSelect` should be void
* if there is no value (in a set for example).
*
* The strong exception guarantee only holds if the expression
* `std::is_nothrow_swappable<ValueType>::value &&
* std::is_nothrow_move_constructible<ValueType>::value` is true.
*
* Behaviour is undefined if the destructor of `ValueType` throws.
*/
template <class ValueType, class KeySelect, class ValueSelect, class Hash,
class KeyEqual, class Allocator, bool StoreHash, class GrowthPolicy>
class robin_hash : private Hash, private KeyEqual, private GrowthPolicy {
private:
template <typename U>
using has_mapped_type =
typename std::integral_constant<bool, !std::is_same<U, void>::value>;
static_assert(
noexcept(std::declval<GrowthPolicy>().bucket_for_hash(std::size_t(0))),
"GrowthPolicy::bucket_for_hash must be noexcept.");
static_assert(noexcept(std::declval<GrowthPolicy>().clear()),
"GrowthPolicy::clear must be noexcept.");
public:
template <bool IsConst> class robin_iterator;
using key_type = typename KeySelect::key_type;
using value_type = ValueType;
using size_type = std::size_t;
using difference_type = std::ptrdiff_t;
using hasher = Hash;
using key_equal = KeyEqual;
using allocator_type = Allocator;
using reference = value_type &;
using const_reference = const value_type &;
using pointer = value_type *;
using const_pointer = const value_type *;
using iterator = robin_iterator<false>;
using const_iterator = robin_iterator<true>;
private:
/**
* Either store the hash because we are asked by the `StoreHash` template
* parameter or store the hash because it doesn't cost us anything in size and
* can be used to speed up rehash.
*/
static constexpr bool STORE_HASH =
StoreHash ||
((sizeof(tsl::detail_robin_hash::bucket_entry<value_type, true>) ==
sizeof(tsl::detail_robin_hash::bucket_entry<value_type, false>)) &&
(sizeof(std::size_t) == sizeof(truncated_hash_type) ||
is_power_of_two_policy<GrowthPolicy>::value) &&
// Don't store the hash for primitive types with default hash.
(!std::is_arithmetic<key_type>::value ||
!std::is_same<Hash, std::hash<key_type>>::value));
/**
* Only use the stored hash on lookup if we are explictly asked. We are not
* sure how slow the KeyEqual operation is. An extra comparison may slow
* things down with a fast KeyEqual.
*/
static constexpr bool USE_STORED_HASH_ON_LOOKUP = StoreHash;
/**
* We can only use the hash on rehash if the size of the hash type is the same
* as the stored one or if we use a power of two modulo. In the case of the
* power of two modulo, we just mask the least significant bytes, we just have
* to check that the truncated_hash_type didn't truncated more bytes.
*/
static bool USE_STORED_HASH_ON_REHASH(size_type bucket_count) {
(void)bucket_count;
if (STORE_HASH && sizeof(std::size_t) == sizeof(truncated_hash_type)) {
return true;
} else if (STORE_HASH && is_power_of_two_policy<GrowthPolicy>::value) {
tsl_rh_assert(bucket_count > 0);
return (bucket_count - 1) <=
std::numeric_limits<truncated_hash_type>::max();
} else {
return false;
}
}
using bucket_entry =
tsl::detail_robin_hash::bucket_entry<value_type, STORE_HASH>;
using distance_type = typename bucket_entry::distance_type;
using buckets_allocator = typename std::allocator_traits<
allocator_type>::template rebind_alloc<bucket_entry>;
using buckets_container_type = std::vector<bucket_entry, buckets_allocator>;
public:
/**
* The 'operator*()' and 'operator->()' methods return a const reference and
* const pointer respectively to the stored value type.
*
* In case of a map, to get a mutable reference to the value associated to a
* key (the '.second' in the stored pair), you have to call 'value()'.
*
* The main reason for this is that if we returned a `std::pair<Key, T>&`
* instead of a `const std::pair<Key, T>&`, the user may modify the key which
* will put the map in a undefined state.
*/
template <bool IsConst> class robin_iterator {
friend class robin_hash;
private:
using bucket_entry_ptr =
typename std::conditional<IsConst, const bucket_entry *,
bucket_entry *>::type;
robin_iterator(bucket_entry_ptr bucket) noexcept : m_bucket(bucket) {}
public:
using iterator_category = std::forward_iterator_tag;
using value_type = const typename robin_hash::value_type;
using difference_type = std::ptrdiff_t;
using reference = value_type &;
using pointer = value_type *;
robin_iterator() noexcept {}
// Copy constructor from iterator to const_iterator.
template <bool TIsConst = IsConst,
typename std::enable_if<TIsConst>::type * = nullptr>
robin_iterator(const robin_iterator<!TIsConst> &other) noexcept
: m_bucket(other.m_bucket) {}
robin_iterator(const robin_iterator &other) = default;
robin_iterator(robin_iterator &&other) = default;
robin_iterator &operator=(const robin_iterator &other) = default;
robin_iterator &operator=(robin_iterator &&other) = default;
const typename robin_hash::key_type &key() const {
return KeySelect()(m_bucket->value());
}
template <class U = ValueSelect,
typename std::enable_if<has_mapped_type<U>::value &&
IsConst>::type * = nullptr>
const typename U::value_type &value() const {
return U()(m_bucket->value());
}
template <class U = ValueSelect,
typename std::enable_if<has_mapped_type<U>::value &&
!IsConst>::type * = nullptr>
typename U::value_type &value() {
return U()(m_bucket->value());
}
reference operator*() const { return m_bucket->value(); }
pointer operator->() const { return std::addressof(m_bucket->value()); }
robin_iterator &operator++() {
while (true) {
if (m_bucket->last_bucket()) {
++m_bucket;
return *this;
}
++m_bucket;
if (!m_bucket->empty()) {
return *this;
}
}
}
robin_iterator operator++(int) {
robin_iterator tmp(*this);
++*this;
return tmp;
}
friend bool operator==(const robin_iterator &lhs,
const robin_iterator &rhs) {
return lhs.m_bucket == rhs.m_bucket;
}
friend bool operator!=(const robin_iterator &lhs,
const robin_iterator &rhs) {
return !(lhs == rhs);
}
private:
bucket_entry_ptr m_bucket;
};
public:
#if defined(__cplusplus) && __cplusplus >= 201402L
robin_hash(size_type bucket_count, const Hash &hash, const KeyEqual &equal,
const Allocator &alloc,
float min_load_factor = DEFAULT_MIN_LOAD_FACTOR,
float max_load_factor = DEFAULT_MAX_LOAD_FACTOR)
: Hash(hash), KeyEqual(equal), GrowthPolicy(bucket_count),
m_buckets_data(
[&]() {
if (bucket_count > max_bucket_count()) {
TSL_RH_THROW_OR_TERMINATE(
std::length_error,
"The map exceeds its maximum bucket count.");
}
return bucket_count;
}(),
alloc),
m_buckets(m_buckets_data.empty() ? static_empty_bucket_ptr()
: m_buckets_data.data()),
m_bucket_count(bucket_count), m_nb_elements(0),
m_grow_on_next_insert(false), m_try_skrink_on_next_insert(false) {
if (m_bucket_count > 0) {
tsl_rh_assert(!m_buckets_data.empty());
m_buckets_data.back().set_as_last_bucket();
}
this->min_load_factor(min_load_factor);
this->max_load_factor(max_load_factor);
}
#else
/**
* C++11 doesn't support the creation of a std::vector with a custom allocator
* and 'count' default-inserted elements. The needed contructor `explicit
* vector(size_type count, const Allocator& alloc = Allocator());` is only
* available in C++14 and later. We thus must resize after using the
* `vector(const Allocator& alloc)` constructor.
*
* We can't use `vector(size_type count, const T& value, const Allocator&
* alloc)` as it requires the value T to be copyable.
*/
robin_hash(size_type bucket_count, const Hash &hash, const KeyEqual &equal,
const Allocator &alloc,
float min_load_factor = DEFAULT_MIN_LOAD_FACTOR,
float max_load_factor = DEFAULT_MAX_LOAD_FACTOR)
: Hash(hash), KeyEqual(equal), GrowthPolicy(bucket_count),
m_buckets_data(alloc), m_buckets(static_empty_bucket_ptr()),
m_bucket_count(bucket_count), m_nb_elements(0),
m_grow_on_next_insert(false), m_try_skrink_on_next_insert(false) {
if (bucket_count > max_bucket_count()) {
TSL_RH_THROW_OR_TERMINATE(std::length_error,
"The map exceeds its maxmimum bucket count.");
}
if (m_bucket_count > 0) {
m_buckets_data.resize(m_bucket_count);
m_buckets = m_buckets_data.data();
tsl_rh_assert(!m_buckets_data.empty());
m_buckets_data.back().set_as_last_bucket();
}
this->min_load_factor(min_load_factor);
this->max_load_factor(max_load_factor);
}
#endif
robin_hash(const robin_hash &other)
: Hash(other), KeyEqual(other), GrowthPolicy(other),
m_buckets_data(other.m_buckets_data),
m_buckets(m_buckets_data.empty() ? static_empty_bucket_ptr()
: m_buckets_data.data()),
m_bucket_count(other.m_bucket_count),
m_nb_elements(other.m_nb_elements),
m_load_threshold(other.m_load_threshold),
m_max_load_factor(other.m_max_load_factor),
m_grow_on_next_insert(other.m_grow_on_next_insert),
m_min_load_factor(other.m_min_load_factor),
m_try_skrink_on_next_insert(other.m_try_skrink_on_next_insert) {}
robin_hash(robin_hash &&other) noexcept(
std::is_nothrow_move_constructible<
Hash>::value &&std::is_nothrow_move_constructible<KeyEqual>::value
&&std::is_nothrow_move_constructible<GrowthPolicy>::value &&
std::is_nothrow_move_constructible<buckets_container_type>::value)
: Hash(std::move(static_cast<Hash &>(other))),
KeyEqual(std::move(static_cast<KeyEqual &>(other))),
GrowthPolicy(std::move(static_cast<GrowthPolicy &>(other))),
m_buckets_data(std::move(other.m_buckets_data)),
m_buckets(m_buckets_data.empty() ? static_empty_bucket_ptr()
: m_buckets_data.data()),
m_bucket_count(other.m_bucket_count),
m_nb_elements(other.m_nb_elements),
m_load_threshold(other.m_load_threshold),
m_max_load_factor(other.m_max_load_factor),
m_grow_on_next_insert(other.m_grow_on_next_insert),
m_min_load_factor(other.m_min_load_factor),
m_try_skrink_on_next_insert(other.m_try_skrink_on_next_insert) {
other.GrowthPolicy::clear();
other.m_buckets_data.clear();
other.m_buckets = static_empty_bucket_ptr();
other.m_bucket_count = 0;
other.m_nb_elements = 0;
other.m_load_threshold = 0;
other.m_grow_on_next_insert = false;
other.m_try_skrink_on_next_insert = false;
}
robin_hash &operator=(const robin_hash &other) {
if (&other != this) {
Hash::operator=(other);
KeyEqual::operator=(other);
GrowthPolicy::operator=(other);
m_buckets_data = other.m_buckets_data;
m_buckets = m_buckets_data.empty() ? static_empty_bucket_ptr()
: m_buckets_data.data();
m_bucket_count = other.m_bucket_count;
m_nb_elements = other.m_nb_elements;
m_load_threshold = other.m_load_threshold;
m_max_load_factor = other.m_max_load_factor;
m_grow_on_next_insert = other.m_grow_on_next_insert;
m_min_load_factor = other.m_min_load_factor;
m_try_skrink_on_next_insert = other.m_try_skrink_on_next_insert;
}
return *this;
}
robin_hash &operator=(robin_hash &&other) {
other.swap(*this);
other.clear();
return *this;
}
allocator_type get_allocator() const {
return m_buckets_data.get_allocator();
}
/*
* Iterators
*/
iterator begin() noexcept {
std::size_t i = 0;
while (i < m_bucket_count && m_buckets[i].empty()) {
i++;
}
return iterator(m_buckets + i);
}
const_iterator begin() const noexcept { return cbegin(); }
const_iterator cbegin() const noexcept {
std::size_t i = 0;
while (i < m_bucket_count && m_buckets[i].empty()) {
i++;
}
return const_iterator(m_buckets + i);
}
iterator end() noexcept { return iterator(m_buckets + m_bucket_count); }
const_iterator end() const noexcept { return cend(); }
const_iterator cend() const noexcept {
return const_iterator(m_buckets + m_bucket_count);
}
/*
* Capacity
*/
bool empty() const noexcept { return m_nb_elements == 0; }
size_type size() const noexcept { return m_nb_elements; }
size_type max_size() const noexcept { return m_buckets_data.max_size(); }
/*
* Modifiers
*/
void clear() noexcept {
for (auto &bucket : m_buckets_data) {
bucket.clear();
}
m_nb_elements = 0;
m_grow_on_next_insert = false;
}
template <typename P> std::pair<iterator, bool> insert(P &&value) {
return insert_impl(KeySelect()(value), std::forward<P>(value));
}
template <typename P> iterator insert_hint(const_iterator hint, P &&value) {
if (hint != cend() &&
compare_keys(KeySelect()(*hint), KeySelect()(value))) {
return mutable_iterator(hint);
}
return insert(std::forward<P>(value)).first;
}
template <class InputIt> void insert(InputIt first, InputIt last) {
if (std::is_base_of<
std::forward_iterator_tag,
typename std::iterator_traits<InputIt>::iterator_category>::value) {
const auto nb_elements_insert = std::distance(first, last);
const size_type nb_free_buckets = m_load_threshold - size();
tsl_rh_assert(m_load_threshold >= size());
if (nb_elements_insert > 0 &&
nb_free_buckets < size_type(nb_elements_insert)) {
reserve(size() + size_type(nb_elements_insert));
}
}
for (; first != last; ++first) {
insert(*first);
}
}
template <class K, class M>
std::pair<iterator, bool> insert_or_assign(K &&key, M &&obj) {
auto it = try_emplace(std::forward<K>(key), std::forward<M>(obj));
if (!it.second) {
it.first.value() = std::forward<M>(obj);
}
return it;
}
template <class K, class M>
iterator insert_or_assign(const_iterator hint, K &&key, M &&obj) {
if (hint != cend() && compare_keys(KeySelect()(*hint), key)) {
auto it = mutable_iterator(hint);
it.value() = std::forward<M>(obj);
return it;
}
return insert_or_assign(std::forward<K>(key), std::forward<M>(obj)).first;
}
template <class... Args> std::pair<iterator, bool> emplace(Args &&... args) {
return insert(value_type(std::forward<Args>(args)...));
}
template <class... Args>
iterator emplace_hint(const_iterator hint, Args &&... args) {
return insert_hint(hint, value_type(std::forward<Args>(args)...));
}
template <class K, class... Args>
std::pair<iterator, bool> try_emplace(K &&key, Args &&... args) {
return insert_impl(key, std::piecewise_construct,
std::forward_as_tuple(std::forward<K>(key)),
std::forward_as_tuple(std::forward<Args>(args)...));
}
template <class K, class... Args>
iterator try_emplace_hint(const_iterator hint, K &&key, Args &&... args) {
if (hint != cend() && compare_keys(KeySelect()(*hint), key)) {
return mutable_iterator(hint);
}
return try_emplace(std::forward<K>(key), std::forward<Args>(args)...).first;
}
/**
* Here to avoid `template<class K> size_type erase(const K& key)` being used
* when we use an `iterator` instead of a `const_iterator`.
*/
iterator erase(iterator pos) {
erase_from_bucket(pos);
/**
* Erase bucket used a backward shift after clearing the bucket.
* Check if there is a new value in the bucket, if not get the next
* non-empty.
*/
if (pos.m_bucket->empty()) {
++pos;
}
m_try_skrink_on_next_insert = true;
return pos;
}
iterator erase(const_iterator pos) { return erase(mutable_iterator(pos)); }
iterator erase(const_iterator first, const_iterator last) {
if (first == last) {
return mutable_iterator(first);
}
auto first_mutable = mutable_iterator(first);
auto last_mutable = mutable_iterator(last);
for (auto it = first_mutable.m_bucket; it != last_mutable.m_bucket; ++it) {
if (!it->empty()) {
it->clear();
m_nb_elements--;
}
}
if (last_mutable == end()) {
return end();
}
/*
* Backward shift on the values which come after the deleted values.
* We try to move the values closer to their ideal bucket.
*/
std::size_t icloser_bucket =
static_cast<std::size_t>(first_mutable.m_bucket - m_buckets);
std::size_t ito_move_closer_value =
static_cast<std::size_t>(last_mutable.m_bucket - m_buckets);
tsl_rh_assert(ito_move_closer_value > icloser_bucket);
const std::size_t ireturn_bucket =
ito_move_closer_value -
std::min(
ito_move_closer_value - icloser_bucket,
std::size_t(
m_buckets[ito_move_closer_value].dist_from_ideal_bucket()));
while (ito_move_closer_value < m_bucket_count &&
m_buckets[ito_move_closer_value].dist_from_ideal_bucket() > 0) {
icloser_bucket =
ito_move_closer_value -
std::min(
ito_move_closer_value - icloser_bucket,
std::size_t(
m_buckets[ito_move_closer_value].dist_from_ideal_bucket()));
tsl_rh_assert(m_buckets[icloser_bucket].empty());
const distance_type new_distance = distance_type(
m_buckets[ito_move_closer_value].dist_from_ideal_bucket() -
(ito_move_closer_value - icloser_bucket));
m_buckets[icloser_bucket].set_value_of_empty_bucket(
new_distance, m_buckets[ito_move_closer_value].truncated_hash(),
std::move(m_buckets[ito_move_closer_value].value()));
m_buckets[ito_move_closer_value].clear();
++icloser_bucket;
++ito_move_closer_value;
}
m_try_skrink_on_next_insert = true;
return iterator(m_buckets + ireturn_bucket);
}
template <class K> size_type erase(const K &key) {
return erase(key, hash_key(key));
}
template <class K> size_type erase(const K &key, std::size_t hash) {
auto it = find(key, hash);
if (it != end()) {
erase_from_bucket(it);
m_try_skrink_on_next_insert = true;
return 1;
} else {
return 0;
}
}
void swap(robin_hash &other) {
using std::swap;
swap(static_cast<Hash &>(*this), static_cast<Hash &>(other));
swap(static_cast<KeyEqual &>(*this), static_cast<KeyEqual &>(other));
swap(static_cast<GrowthPolicy &>(*this),
static_cast<GrowthPolicy &>(other));
swap(m_buckets_data, other.m_buckets_data);
swap(m_buckets, other.m_buckets);
swap(m_bucket_count, other.m_bucket_count);
swap(m_nb_elements, other.m_nb_elements);
swap(m_load_threshold, other.m_load_threshold);
swap(m_max_load_factor, other.m_max_load_factor);
swap(m_grow_on_next_insert, other.m_grow_on_next_insert);
swap(m_min_load_factor, other.m_min_load_factor);
swap(m_try_skrink_on_next_insert, other.m_try_skrink_on_next_insert);
}
/*
* Lookup
*/
template <
class K, class U = ValueSelect,
typename std::enable_if<has_mapped_type<U>::value>::type * = nullptr>
typename U::value_type &at(const K &key) {
return at(key, hash_key(key));
}
template <
class K, class U = ValueSelect,
typename std::enable_if<has_mapped_type<U>::value>::type * = nullptr>
typename U::value_type &at(const K &key, std::size_t hash) {
return const_cast<typename U::value_type &>(
static_cast<const robin_hash *>(this)->at(key, hash));
}
template <
class K, class U = ValueSelect,
typename std::enable_if<has_mapped_type<U>::value>::type * = nullptr>
const typename U::value_type &at(const K &key) const {
return at(key, hash_key(key));
}
template <
class K, class U = ValueSelect,
typename std::enable_if<has_mapped_type<U>::value>::type * = nullptr>
const typename U::value_type &at(const K &key, std::size_t hash) const {
auto it = find(key, hash);
if (it != cend()) {
return it.value();
} else {
TSL_RH_THROW_OR_TERMINATE(std::out_of_range, "Couldn't find key.");
}
}
template <
class K, class U = ValueSelect,
typename std::enable_if<has_mapped_type<U>::value>::type * = nullptr>
typename U::value_type &operator[](K &&key) {
return try_emplace(std::forward<K>(key)).first.value();
}
template <class K> size_type count(const K &key) const {
return count(key, hash_key(key));
}
template <class K> size_type count(const K &key, std::size_t hash) const {
if (find(key, hash) != cend()) {
return 1;
} else {
return 0;
}
}
template <class K> iterator find(const K &key) {
return find_impl(key, hash_key(key));
}
template <class K> iterator find(const K &key, std::size_t hash) {
return find_impl(key, hash);
}
template <class K> const_iterator find(const K &key) const {
return find_impl(key, hash_key(key));
}
template <class K> const_iterator find(const K &key, std::size_t hash) const {
return find_impl(key, hash);
}
template <class K> std::pair<iterator, iterator> equal_range(const K &key) {
return equal_range(key, hash_key(key));
}
template <class K>
std::pair<iterator, iterator> equal_range(const K &key, std::size_t hash) {
iterator it = find(key, hash);
return std::make_pair(it, (it == end()) ? it : std::next(it));
}
template <class K>
std::pair<const_iterator, const_iterator> equal_range(const K &key) const {
return equal_range(key, hash_key(key));
}
template <class K>
std::pair<const_iterator, const_iterator>
equal_range(const K &key, std::size_t hash) const {
const_iterator it = find(key, hash);
return std::make_pair(it, (it == cend()) ? it : std::next(it));
}
/*
* Bucket interface
*/
size_type bucket_count() const { return m_bucket_count; }
size_type max_bucket_count() const {
return std::min(GrowthPolicy::max_bucket_count(),
m_buckets_data.max_size());
}
/*
* Hash policy
*/
float load_factor() const {
if (bucket_count() == 0) {
return 0;
}
return float(m_nb_elements) / float(bucket_count());
}
float min_load_factor() const { return m_min_load_factor; }
float max_load_factor() const { return m_max_load_factor; }
void min_load_factor(float ml) {
m_min_load_factor = clamp(ml, float(MINIMUM_MIN_LOAD_FACTOR),
float(MAXIMUM_MIN_LOAD_FACTOR));
}
void max_load_factor(float ml) {
m_max_load_factor = clamp(ml, float(MINIMUM_MAX_LOAD_FACTOR),
float(MAXIMUM_MAX_LOAD_FACTOR));
m_load_threshold = size_type(float(bucket_count()) * m_max_load_factor);
}
void rehash(size_type count) {
count = std::max(count,
size_type(std::ceil(float(size()) / max_load_factor())));
rehash_impl(count);
}
void reserve(size_type count) {
rehash(size_type(std::ceil(float(count) / max_load_factor())));
}
/*
* Observers
*/
hasher hash_function() const { return static_cast<const Hash &>(*this); }
key_equal key_eq() const { return static_cast<const KeyEqual &>(*this); }
/*
* Other
*/
iterator mutable_iterator(const_iterator pos) {
return iterator(const_cast<bucket_entry *>(pos.m_bucket));
}
private:
template <class K> std::size_t hash_key(const K &key) const {
return Hash::operator()(key);
}
template <class K1, class K2>
bool compare_keys(const K1 &key1, const K2 &key2) const {
return KeyEqual::operator()(key1, key2);
}
std::size_t bucket_for_hash(std::size_t hash) const {
const std::size_t bucket = GrowthPolicy::bucket_for_hash(hash);
tsl_rh_assert(bucket < m_bucket_count ||
(bucket == 0 && m_bucket_count == 0));
return bucket;
}
template <class U = GrowthPolicy,
typename std::enable_if<is_power_of_two_policy<U>::value>::type * =
nullptr>
std::size_t next_bucket(std::size_t index) const noexcept {
tsl_rh_assert(index < bucket_count());
return (index + 1) & this->m_mask;
}
template <class U = GrowthPolicy,
typename std::enable_if<!is_power_of_two_policy<U>::value>::type * =
nullptr>
std::size_t next_bucket(std::size_t index) const noexcept {
tsl_rh_assert(index < bucket_count());
index++;
return (index != bucket_count()) ? index : 0;
}
template <class K> iterator find_impl(const K &key, std::size_t hash) {
return mutable_iterator(
static_cast<const robin_hash *>(this)->find(key, hash));
}
template <class K>
const_iterator find_impl(const K &key, std::size_t hash) const {
std::size_t ibucket = bucket_for_hash(hash);
distance_type dist_from_ideal_bucket = 0;
while (dist_from_ideal_bucket <=
m_buckets[ibucket].dist_from_ideal_bucket()) {
if (TSL_RH_LIKELY(
(!USE_STORED_HASH_ON_LOOKUP ||
m_buckets[ibucket].bucket_hash_equal(hash)) &&
compare_keys(KeySelect()(m_buckets[ibucket].value()), key))) {
return const_iterator(m_buckets + ibucket);
}
ibucket = next_bucket(ibucket);
dist_from_ideal_bucket++;
}
return cend();
}
void erase_from_bucket(iterator pos) {
pos.m_bucket->clear();
m_nb_elements--;
/**
* Backward shift, swap the empty bucket, previous_ibucket, with the values
* on its right, ibucket, until we cross another empty bucket or if the
* other bucket has a distance_from_ideal_bucket == 0.
*
* We try to move the values closer to their ideal bucket.
*/
std::size_t previous_ibucket =
static_cast<std::size_t>(pos.m_bucket - m_buckets);
std::size_t ibucket = next_bucket(previous_ibucket);
while (m_buckets[ibucket].dist_from_ideal_bucket() > 0) {
tsl_rh_assert(m_buckets[previous_ibucket].empty());
const distance_type new_distance =
distance_type(m_buckets[ibucket].dist_from_ideal_bucket() - 1);
m_buckets[previous_ibucket].set_value_of_empty_bucket(
new_distance, m_buckets[ibucket].truncated_hash(),
std::move(m_buckets[ibucket].value()));
m_buckets[ibucket].clear();
previous_ibucket = ibucket;
ibucket = next_bucket(ibucket);
}
}
template <class K, class... Args>
std::pair<iterator, bool> insert_impl(const K &key,
Args &&... value_type_args) {
const std::size_t hash = hash_key(key);
std::size_t ibucket = bucket_for_hash(hash);
distance_type dist_from_ideal_bucket = 0;
while (dist_from_ideal_bucket <=
m_buckets[ibucket].dist_from_ideal_bucket()) {
if ((!USE_STORED_HASH_ON_LOOKUP ||
m_buckets[ibucket].bucket_hash_equal(hash)) &&
compare_keys(KeySelect()(m_buckets[ibucket].value()), key)) {
return std::make_pair(iterator(m_buckets + ibucket), false);
}
ibucket = next_bucket(ibucket);
dist_from_ideal_bucket++;
}
if (rehash_on_extreme_load()) {
ibucket = bucket_for_hash(hash);
dist_from_ideal_bucket = 0;
while (dist_from_ideal_bucket <=
m_buckets[ibucket].dist_from_ideal_bucket()) {
ibucket = next_bucket(ibucket);
dist_from_ideal_bucket++;
}
}
if (m_buckets[ibucket].empty()) {
m_buckets[ibucket].set_value_of_empty_bucket(
dist_from_ideal_bucket, bucket_entry::truncate_hash(hash),
std::forward<Args>(value_type_args)...);
} else {
insert_value(ibucket, dist_from_ideal_bucket,
bucket_entry::truncate_hash(hash),
std::forward<Args>(value_type_args)...);
}
m_nb_elements++;
/*
* The value will be inserted in ibucket in any case, either because it was
* empty or by stealing the bucket (robin hood).
*/
return std::make_pair(iterator(m_buckets + ibucket), true);
}
template <class... Args>
void insert_value(std::size_t ibucket, distance_type dist_from_ideal_bucket,
truncated_hash_type hash, Args &&... value_type_args) {
value_type value(std::forward<Args>(value_type_args)...);
insert_value_impl(ibucket, dist_from_ideal_bucket, hash, value);
}
void insert_value(std::size_t ibucket, distance_type dist_from_ideal_bucket,
truncated_hash_type hash, value_type &&value) {
insert_value_impl(ibucket, dist_from_ideal_bucket, hash, value);
}
/*
* We don't use `value_type&& value` as last argument due to a bug in MSVC
* when `value_type` is a pointer, The compiler is not able to see the
* difference between `std::string*` and `std::string*&&` resulting in compile
* error.
*
* The `value` will be in a moved state at the end of the function.
*/
void insert_value_impl(std::size_t ibucket,
distance_type dist_from_ideal_bucket,
truncated_hash_type hash, value_type &value) {
m_buckets[ibucket].swap_with_value_in_bucket(dist_from_ideal_bucket, hash,
value);
ibucket = next_bucket(ibucket);
dist_from_ideal_bucket++;
while (!m_buckets[ibucket].empty()) {
if (dist_from_ideal_bucket >
m_buckets[ibucket].dist_from_ideal_bucket()) {
if (dist_from_ideal_bucket >= REHASH_ON_HIGH_NB_PROBES__NPROBES &&
load_factor() >= REHASH_ON_HIGH_NB_PROBES__MIN_LOAD_FACTOR) {
/**
* The number of probes is really high, rehash the map on the next
* insert. Difficult to do now as rehash may throw an exception.
*/
m_grow_on_next_insert = true;
}
m_buckets[ibucket].swap_with_value_in_bucket(dist_from_ideal_bucket,
hash, value);
}
ibucket = next_bucket(ibucket);
dist_from_ideal_bucket++;
}
m_buckets[ibucket].set_value_of_empty_bucket(dist_from_ideal_bucket, hash,
std::move(value));
}
void rehash_impl(size_type count) {
robin_hash new_table(count, static_cast<Hash &>(*this),
static_cast<KeyEqual &>(*this), get_allocator(),
m_min_load_factor, m_max_load_factor);
const bool use_stored_hash =
USE_STORED_HASH_ON_REHASH(new_table.bucket_count());
for (auto &bucket : m_buckets_data) {
if (bucket.empty()) {
continue;
}
const std::size_t hash =
use_stored_hash ? bucket.truncated_hash()
: new_table.hash_key(KeySelect()(bucket.value()));
new_table.insert_value_on_rehash(new_table.bucket_for_hash(hash), 0,
bucket_entry::truncate_hash(hash),
std::move(bucket.value()));
}
new_table.m_nb_elements = m_nb_elements;
new_table.swap(*this);
}
void insert_value_on_rehash(std::size_t ibucket,
distance_type dist_from_ideal_bucket,
truncated_hash_type hash, value_type &&value) {
while (true) {
if (dist_from_ideal_bucket >
m_buckets[ibucket].dist_from_ideal_bucket()) {
if (m_buckets[ibucket].empty()) {
m_buckets[ibucket].set_value_of_empty_bucket(dist_from_ideal_bucket,
hash, std::move(value));
return;
} else {
m_buckets[ibucket].swap_with_value_in_bucket(dist_from_ideal_bucket,
hash, value);
}
}
dist_from_ideal_bucket++;
ibucket = next_bucket(ibucket);
}
}
/**
* Grow the table if m_grow_on_next_insert is true or we reached the
* max_load_factor. Shrink the table if m_try_skrink_on_next_insert is true
* (an erase occured) and we're below the min_load_factor.
*
* Return true if the table has been rehashed.
*/
bool rehash_on_extreme_load() {
if (m_grow_on_next_insert || size() >= m_load_threshold) {
rehash_impl(GrowthPolicy::next_bucket_count());
m_grow_on_next_insert = false;
return true;
}
if (m_try_skrink_on_next_insert) {
m_try_skrink_on_next_insert = false;
if (m_min_load_factor != 0.0f && load_factor() < m_min_load_factor) {
reserve(size() + 1);
return true;
}
}
return false;
}
public:
static const size_type DEFAULT_INIT_BUCKETS_SIZE = 0;
static constexpr float DEFAULT_MAX_LOAD_FACTOR = 0.5f;
static constexpr float MINIMUM_MAX_LOAD_FACTOR = 0.2f;
static constexpr float MAXIMUM_MAX_LOAD_FACTOR = 0.95f;
static constexpr float DEFAULT_MIN_LOAD_FACTOR = 0.0f;
static constexpr float MINIMUM_MIN_LOAD_FACTOR = 0.0f;
static constexpr float MAXIMUM_MIN_LOAD_FACTOR = 0.15f;
static_assert(MINIMUM_MAX_LOAD_FACTOR < MAXIMUM_MAX_LOAD_FACTOR,
"MINIMUM_MAX_LOAD_FACTOR should be < MAXIMUM_MAX_LOAD_FACTOR");
static_assert(MINIMUM_MIN_LOAD_FACTOR < MAXIMUM_MIN_LOAD_FACTOR,
"MINIMUM_MIN_LOAD_FACTOR should be < MAXIMUM_MIN_LOAD_FACTOR");
static_assert(MAXIMUM_MIN_LOAD_FACTOR < MINIMUM_MAX_LOAD_FACTOR,
"MAXIMUM_MIN_LOAD_FACTOR should be < MINIMUM_MAX_LOAD_FACTOR");
private:
static const distance_type REHASH_ON_HIGH_NB_PROBES__NPROBES = 128;
static constexpr float REHASH_ON_HIGH_NB_PROBES__MIN_LOAD_FACTOR = 0.15f;
/**
* Return an always valid pointer to an static empty bucket_entry with
* last_bucket() == true.
*/
bucket_entry *static_empty_bucket_ptr() {
static bucket_entry empty_bucket(true);
return &empty_bucket;
}
private:
buckets_container_type m_buckets_data;
/**
* Points to m_buckets_data.data() if !m_buckets_data.empty() otherwise points
* to static_empty_bucket_ptr. This variable is useful to avoid the cost of
* checking if m_buckets_data is empty when trying to find an element.
*
* TODO Remove m_buckets_data and only use a pointer instead of a
* pointer+vector to save some space in the robin_hash object. Manage the
* Allocator manually.
*/
bucket_entry *m_buckets;
/**
* Used a lot in find, avoid the call to m_buckets_data.size() which is a bit
* slower.
*/
size_type m_bucket_count;
size_type m_nb_elements;
size_type m_load_threshold;
float m_max_load_factor;
bool m_grow_on_next_insert;
float m_min_load_factor;
/**
* We can't shrink down the map on erase operations as the erase methods need
* to return the next iterator. Shrinking the map would invalidate all the
* iterators and we could not return the next iterator in a meaningful way, On
* erase, we thus just indicate on erase that we should try to shrink the hash
* table on the next insert if we go below the min_load_factor.
*/
bool m_try_skrink_on_next_insert;
};
} // namespace detail_robin_hash
} // namespace tsl
#endif
/**
* MIT License
*
* Copyright (c) 2017 Tessil
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef TSL_ROBIN_MAP_H
#define TSL_ROBIN_MAP_H
#include "robin_hash.h"
#include <cstddef>
#include <functional>
#include <initializer_list>
#include <memory>
#include <type_traits>
#include <utility>
namespace tsl {
/**
* Implementation of a hash map using open-adressing and the robin hood hashing
* algorithm with backward shift deletion.
*
* For operations modifying the hash map (insert, erase, rehash, ...), the
* strong exception guarantee is only guaranteed when the expression
* `std::is_nothrow_swappable<std::pair<Key, T>>::value &&
* std::is_nothrow_move_constructible<std::pair<Key, T>>::value` is true,
* otherwise if an exception is thrown during the swap or the move, the hash map
* may end up in a undefined state. Per the standard a `Key` or `T` with a
* noexcept copy constructor and no move constructor also satisfies the
* `std::is_nothrow_move_constructible<std::pair<Key, T>>::value` criterion (and
* will thus guarantee the strong exception for the map).
*
* When `StoreHash` is true, 32 bits of the hash are stored alongside the
* values. It can improve the performance during lookups if the `KeyEqual`
* function takes time (if it engenders a cache-miss for example) as we then
* compare the stored hashes before comparing the keys. When
* `tsl::rh::power_of_two_growth_policy` is used as `GrowthPolicy`, it may also
* speed-up the rehash process as we can avoid to recalculate the hash. When it
* is detected that storing the hash will not incur any memory penality due to
* alignement (i.e. `sizeof(tsl::detail_robin_hash::bucket_entry<ValueType,
* true>) == sizeof(tsl::detail_robin_hash::bucket_entry<ValueType, false>)`)
* and `tsl::rh::power_of_two_growth_policy` is used, the hash will be stored
* even if `StoreHash` is false so that we can speed-up the rehash (but it will
* not be used on lookups unless `StoreHash` is true).
*
* `GrowthPolicy` defines how the map grows and consequently how a hash value is
* mapped to a bucket. By default the map uses
* `tsl::rh::power_of_two_growth_policy`. This policy keeps the number of
* buckets to a power of two and uses a mask to map the hash to a bucket instead
* of the slow modulo. Other growth policies are available and you may define
* your own growth policy, check `tsl::rh::power_of_two_growth_policy` for the
* interface.
*
* `std::pair<Key, T>` must be swappable.
*
* `Key` and `T` must be copy and/or move constructible.
*
* If the destructor of `Key` or `T` throws an exception, the behaviour of the
* class is undefined.
*
* Iterators invalidation:
* - clear, operator=, reserve, rehash: always invalidate the iterators.
* - insert, emplace, emplace_hint, operator[]: if there is an effective
* insert, invalidate the iterators.
* - erase: always invalidate the iterators.
*/
template <class Key, class T, class Hash = std::hash<Key>,
class KeyEqual = std::equal_to<Key>,
class Allocator = std::allocator<std::pair<Key, T>>,
bool StoreHash = false,
class GrowthPolicy = tsl::rh::power_of_two_growth_policy<2>>
class robin_map {
private:
template <typename U>
using has_is_transparent = tsl::detail_robin_hash::has_is_transparent<U>;
class KeySelect {
public:
using key_type = Key;
const key_type &
operator()(const std::pair<Key, T> &key_value) const noexcept {
return key_value.first;
}
key_type &operator()(std::pair<Key, T> &key_value) noexcept {
return key_value.first;
}
};
class ValueSelect {
public:
using value_type = T;
const value_type &
operator()(const std::pair<Key, T> &key_value) const noexcept {
return key_value.second;
}
value_type &operator()(std::pair<Key, T> &key_value) noexcept {
return key_value.second;
}
};
using ht = detail_robin_hash::robin_hash<std::pair<Key, T>, KeySelect,
ValueSelect, Hash, KeyEqual,
Allocator, StoreHash, GrowthPolicy>;
public:
using key_type = typename ht::key_type;
using mapped_type = T;
using value_type = typename ht::value_type;
using size_type = typename ht::size_type;
using difference_type = typename ht::difference_type;
using hasher = typename ht::hasher;
using key_equal = typename ht::key_equal;
using allocator_type = typename ht::allocator_type;
using reference = typename ht::reference;
using const_reference = typename ht::const_reference;
using pointer = typename ht::pointer;
using const_pointer = typename ht::const_pointer;
using iterator = typename ht::iterator;
using const_iterator = typename ht::const_iterator;
public:
/*
* Constructors
*/
robin_map() : robin_map(ht::DEFAULT_INIT_BUCKETS_SIZE) {}
explicit robin_map(size_type bucket_count, const Hash &hash = Hash(),
const KeyEqual &equal = KeyEqual(),
const Allocator &alloc = Allocator())
: m_ht(bucket_count, hash, equal, alloc) {}
robin_map(size_type bucket_count, const Allocator &alloc)
: robin_map(bucket_count, Hash(), KeyEqual(), alloc) {}
robin_map(size_type bucket_count, const Hash &hash, const Allocator &alloc)
: robin_map(bucket_count, hash, KeyEqual(), alloc) {}
explicit robin_map(const Allocator &alloc)
: robin_map(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) {}
template <class InputIt>
robin_map(InputIt first, InputIt last,
size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
const Hash &hash = Hash(), const KeyEqual &equal = KeyEqual(),
const Allocator &alloc = Allocator())
: robin_map(bucket_count, hash, equal, alloc) {
insert(first, last);
}
template <class InputIt>
robin_map(InputIt first, InputIt last, size_type bucket_count,
const Allocator &alloc)
: robin_map(first, last, bucket_count, Hash(), KeyEqual(), alloc) {}
template <class InputIt>
robin_map(InputIt first, InputIt last, size_type bucket_count,
const Hash &hash, const Allocator &alloc)
: robin_map(first, last, bucket_count, hash, KeyEqual(), alloc) {}
robin_map(std::initializer_list<value_type> init,
size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
const Hash &hash = Hash(), const KeyEqual &equal = KeyEqual(),
const Allocator &alloc = Allocator())
: robin_map(init.begin(), init.end(), bucket_count, hash, equal, alloc) {}
robin_map(std::initializer_list<value_type> init, size_type bucket_count,
const Allocator &alloc)
: robin_map(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(),
alloc) {}
robin_map(std::initializer_list<value_type> init, size_type bucket_count,
const Hash &hash, const Allocator &alloc)
: robin_map(init.begin(), init.end(), bucket_count, hash, KeyEqual(),
alloc) {}
robin_map &operator=(std::initializer_list<value_type> ilist) {
m_ht.clear();
m_ht.reserve(ilist.size());
m_ht.insert(ilist.begin(), ilist.end());
return *this;
}
allocator_type get_allocator() const { return m_ht.get_allocator(); }
/*
* Iterators
*/
iterator begin() noexcept { return m_ht.begin(); }
const_iterator begin() const noexcept { return m_ht.begin(); }
const_iterator cbegin() const noexcept { return m_ht.cbegin(); }
iterator end() noexcept { return m_ht.end(); }
const_iterator end() const noexcept { return m_ht.end(); }
const_iterator cend() const noexcept { return m_ht.cend(); }
/*
* Capacity
*/
bool empty() const noexcept { return m_ht.empty(); }
size_type size() const noexcept { return m_ht.size(); }
size_type max_size() const noexcept { return m_ht.max_size(); }
/*
* Modifiers
*/
void clear() noexcept { m_ht.clear(); }
std::pair<iterator, bool> insert(const value_type &value) {
return m_ht.insert(value);
}
template <class P, typename std::enable_if<std::is_constructible<
value_type, P &&>::value>::type * = nullptr>
std::pair<iterator, bool> insert(P &&value) {
return m_ht.emplace(std::forward<P>(value));
}
std::pair<iterator, bool> insert(value_type &&value) {
return m_ht.insert(std::move(value));
}
iterator insert(const_iterator hint, const value_type &value) {
return m_ht.insert_hint(hint, value);
}
template <class P, typename std::enable_if<std::is_constructible<
value_type, P &&>::value>::type * = nullptr>
iterator insert(const_iterator hint, P &&value) {
return m_ht.emplace_hint(hint, std::forward<P>(value));
}
iterator insert(const_iterator hint, value_type &&value) {
return m_ht.insert_hint(hint, std::move(value));
}
template <class InputIt> void insert(InputIt first, InputIt last) {
m_ht.insert(first, last);
}
void insert(std::initializer_list<value_type> ilist) {
m_ht.insert(ilist.begin(), ilist.end());
}
template <class M>
std::pair<iterator, bool> insert_or_assign(const key_type &k, M &&obj) {
return m_ht.insert_or_assign(k, std::forward<M>(obj));
}
template <class M>
std::pair<iterator, bool> insert_or_assign(key_type &&k, M &&obj) {
return m_ht.insert_or_assign(std::move(k), std::forward<M>(obj));
}
template <class M>
iterator insert_or_assign(const_iterator hint, const key_type &k, M &&obj) {
return m_ht.insert_or_assign(hint, k, std::forward<M>(obj));
}
template <class M>
iterator insert_or_assign(const_iterator hint, key_type &&k, M &&obj) {
return m_ht.insert_or_assign(hint, std::move(k), std::forward<M>(obj));
}
/**
* Due to the way elements are stored, emplace will need to move or copy the
* key-value once. The method is equivalent to
* insert(value_type(std::forward<Args>(args)...));
*
* Mainly here for compatibility with the std::unordered_map interface.
*/
template <class... Args> std::pair<iterator, bool> emplace(Args &&... args) {
return m_ht.emplace(std::forward<Args>(args)...);
}
/**
* Due to the way elements are stored, emplace_hint will need to move or copy
* the key-value once. The method is equivalent to insert(hint,
* value_type(std::forward<Args>(args)...));
*
* Mainly here for compatibility with the std::unordered_map interface.
*/
template <class... Args>
iterator emplace_hint(const_iterator hint, Args &&... args) {
return m_ht.emplace_hint(hint, std::forward<Args>(args)...);
}
template <class... Args>
std::pair<iterator, bool> try_emplace(const key_type &k, Args &&... args) {
return m_ht.try_emplace(k, std::forward<Args>(args)...);
}
template <class... Args>
std::pair<iterator, bool> try_emplace(key_type &&k, Args &&... args) {
return m_ht.try_emplace(std::move(k), std::forward<Args>(args)...);
}
template <class... Args>
iterator try_emplace(const_iterator hint, const key_type &k,
Args &&... args) {
return m_ht.try_emplace_hint(hint, k, std::forward<Args>(args)...);
}
template <class... Args>
iterator try_emplace(const_iterator hint, key_type &&k, Args &&... args) {
return m_ht.try_emplace_hint(hint, std::move(k),
std::forward<Args>(args)...);
}
iterator erase(iterator pos) { return m_ht.erase(pos); }
iterator erase(const_iterator pos) { return m_ht.erase(pos); }
iterator erase(const_iterator first, const_iterator last) {
return m_ht.erase(first, last);
}
size_type erase(const key_type &key) { return m_ht.erase(key); }
/**
* Use the hash value 'precalculated_hash' instead of hashing the key. The
* hash value should be the same as hash_function()(key). Usefull to speed-up
* the lookup to the value if you already have the hash.
*/
size_type erase(const key_type &key, std::size_t precalculated_hash) {
return m_ht.erase(key, precalculated_hash);
}
/**
* This overload only participates in the overload resolution if the typedef
* KeyEqual::is_transparent exists. If so, K must be hashable and comparable
* to Key.
*/
template <
class K, class KE = KeyEqual,
typename std::enable_if<has_is_transparent<KE>::value>::type * = nullptr>
size_type erase(const K &key) {
return m_ht.erase(key);
}
/**
* @copydoc erase(const K& key)
*
* Use the hash value 'precalculated_hash' instead of hashing the key. The
* hash value should be the same as hash_function()(key). Usefull to speed-up
* the lookup to the value if you already have the hash.
*/
template <
class K, class KE = KeyEqual,
typename std::enable_if<has_is_transparent<KE>::value>::type * = nullptr>
size_type erase(const K &key, std::size_t precalculated_hash) {
return m_ht.erase(key, precalculated_hash);
}
void swap(robin_map &other) { other.m_ht.swap(m_ht); }
/*
* Lookup
*/
T &at(const Key &key) { return m_ht.at(key); }
/**
* Use the hash value 'precalculated_hash' instead of hashing the key. The
* hash value should be the same as hash_function()(key). Usefull to speed-up
* the lookup if you already have the hash.
*/
T &at(const Key &key, std::size_t precalculated_hash) {
return m_ht.at(key, precalculated_hash);
}
const T &at(const Key &key) const { return m_ht.at(key); }
/**
* @copydoc at(const Key& key, std::size_t precalculated_hash)
*/
const T &at(const Key &key, std::size_t precalculated_hash) const {
return m_ht.at(key, precalculated_hash);
}
/**
* This overload only participates in the overload resolution if the typedef
* KeyEqual::is_transparent exists. If so, K must be hashable and comparable
* to Key.
*/
template <
class K, class KE = KeyEqual,
typename std::enable_if<has_is_transparent<KE>::value>::type * = nullptr>
T &at(const K &key) {
return m_ht.at(key);
}
/**
* @copydoc at(const K& key)
*
* Use the hash value 'precalculated_hash' instead of hashing the key. The
* hash value should be the same as hash_function()(key). Usefull to speed-up
* the lookup if you already have the hash.
*/
template <
class K, class KE = KeyEqual,
typename std::enable_if<has_is_transparent<KE>::value>::type * = nullptr>
T &at(const K &key, std::size_t precalculated_hash) {
return m_ht.at(key, precalculated_hash);
}
/**
* @copydoc at(const K& key)
*/
template <
class K, class KE = KeyEqual,
typename std::enable_if<has_is_transparent<KE>::value>::type * = nullptr>
const T &at(const K &key) const {
return m_ht.at(key);
}
/**
* @copydoc at(const K& key, std::size_t precalculated_hash)
*/
template <
class K, class KE = KeyEqual,
typename std::enable_if<has_is_transparent<KE>::value>::type * = nullptr>
const T &at(const K &key, std::size_t precalculated_hash) const {
return m_ht.at(key, precalculated_hash);
}
T &operator[](const Key &key) { return m_ht[key]; }
T &operator[](Key &&key) { return m_ht[std::move(key)]; }
size_type count(const Key &key) const { return m_ht.count(key); }
/**
* Use the hash value 'precalculated_hash' instead of hashing the key. The
* hash value should be the same as hash_function()(key). Usefull to speed-up
* the lookup if you already have the hash.
*/
size_type count(const Key &key, std::size_t precalculated_hash) const {
return m_ht.count(key, precalculated_hash);
}
/**
* This overload only participates in the overload resolution if the typedef
* KeyEqual::is_transparent exists. If so, K must be hashable and comparable
* to Key.
*/
template <
class K, class KE = KeyEqual,
typename std::enable_if<has_is_transparent<KE>::value>::type * = nullptr>
size_type count(const K &key) const {
return m_ht.count(key);
}
/**
* @copydoc count(const K& key) const
*
* Use the hash value 'precalculated_hash' instead of hashing the key. The
* hash value should be the same as hash_function()(key). Usefull to speed-up
* the lookup if you already have the hash.
*/
template <
class K, class KE = KeyEqual,
typename std::enable_if<has_is_transparent<KE>::value>::type * = nullptr>
size_type count(const K &key, std::size_t precalculated_hash) const {
return m_ht.count(key, precalculated_hash);
}
iterator find(const Key &key) { return m_ht.find(key); }
/**
* Use the hash value 'precalculated_hash' instead of hashing the key. The
* hash value should be the same as hash_function()(key). Usefull to speed-up
* the lookup if you already have the hash.
*/
iterator find(const Key &key, std::size_t precalculated_hash) {
return m_ht.find(key, precalculated_hash);
}
const_iterator find(const Key &key) const { return m_ht.find(key); }
/**
* @copydoc find(const Key& key, std::size_t precalculated_hash)
*/
const_iterator find(const Key &key, std::size_t precalculated_hash) const {
return m_ht.find(key, precalculated_hash);
}
/**
* This overload only participates in the overload resolution if the typedef
* KeyEqual::is_transparent exists. If so, K must be hashable and comparable
* to Key.
*/
template <
class K, class KE = KeyEqual,
typename std::enable_if<has_is_transparent<KE>::value>::type * = nullptr>
iterator find(const K &key) {
return m_ht.find(key);
}
/**
* @copydoc find(const K& key)
*
* Use the hash value 'precalculated_hash' instead of hashing the key. The
* hash value should be the same as hash_function()(key). Usefull to speed-up
* the lookup if you already have the hash.
*/
template <
class K, class KE = KeyEqual,
typename std::enable_if<has_is_transparent<KE>::value>::type * = nullptr>
iterator find(const K &key, std::size_t precalculated_hash) {
return m_ht.find(key, precalculated_hash);
}
/**
* @copydoc find(const K& key)
*/
template <
class K, class KE = KeyEqual,
typename std::enable_if<has_is_transparent<KE>::value>::type * = nullptr>
const_iterator find(const K &key) const {
return m_ht.find(key);
}
/**
* @copydoc find(const K& key)
*
* Use the hash value 'precalculated_hash' instead of hashing the key. The
* hash value should be the same as hash_function()(key). Usefull to speed-up
* the lookup if you already have the hash.
*/
template <
class K, class KE = KeyEqual,
typename std::enable_if<has_is_transparent<KE>::value>::type * = nullptr>
const_iterator find(const K &key, std::size_t precalculated_hash) const {
return m_ht.find(key, precalculated_hash);
}
std::pair<iterator, iterator> equal_range(const Key &key) {
return m_ht.equal_range(key);
}
/**
* Use the hash value 'precalculated_hash' instead of hashing the key. The
* hash value should be the same as hash_function()(key). Usefull to speed-up
* the lookup if you already have the hash.
*/
std::pair<iterator, iterator> equal_range(const Key &key,
std::size_t precalculated_hash) {
return m_ht.equal_range(key, precalculated_hash);
}
std::pair<const_iterator, const_iterator> equal_range(const Key &key) const {
return m_ht.equal_range(key);
}
/**
* @copydoc equal_range(const Key& key, std::size_t precalculated_hash)
*/
std::pair<const_iterator, const_iterator>
equal_range(const Key &key, std::size_t precalculated_hash) const {
return m_ht.equal_range(key, precalculated_hash);
}
/**
* This overload only participates in the overload resolution if the typedef
* KeyEqual::is_transparent exists. If so, K must be hashable and comparable
* to Key.
*/
template <
class K, class KE = KeyEqual,
typename std::enable_if<has_is_transparent<KE>::value>::type * = nullptr>
std::pair<iterator, iterator> equal_range(const K &key) {
return m_ht.equal_range(key);
}
/**
* @copydoc equal_range(const K& key)
*
* Use the hash value 'precalculated_hash' instead of hashing the key. The
* hash value should be the same as hash_function()(key). Usefull to speed-up
* the lookup if you already have the hash.
*/
template <
class K, class KE = KeyEqual,
typename std::enable_if<has_is_transparent<KE>::value>::type * = nullptr>
std::pair<iterator, iterator> equal_range(const K &key,
std::size_t precalculated_hash) {
return m_ht.equal_range(key, precalculated_hash);
}
/**
* @copydoc equal_range(const K& key)
*/
template <
class K, class KE = KeyEqual,
typename std::enable_if<has_is_transparent<KE>::value>::type * = nullptr>
std::pair<const_iterator, const_iterator> equal_range(const K &key) const {
return m_ht.equal_range(key);
}
/**
* @copydoc equal_range(const K& key, std::size_t precalculated_hash)
*/
template <
class K, class KE = KeyEqual,
typename std::enable_if<has_is_transparent<KE>::value>::type * = nullptr>
std::pair<const_iterator, const_iterator>
equal_range(const K &key, std::size_t precalculated_hash) const {
return m_ht.equal_range(key, precalculated_hash);
}
/*
* Bucket interface
*/
size_type bucket_count() const { return m_ht.bucket_count(); }
size_type max_bucket_count() const { return m_ht.max_bucket_count(); }
/*
* Hash policy
*/
float load_factor() const { return m_ht.load_factor(); }
float min_load_factor() const { return m_ht.min_load_factor(); }
float max_load_factor() const { return m_ht.max_load_factor(); }
/**
* Set the `min_load_factor` to `ml`. When the `load_factor` of the map goes
* below `min_load_factor` after some erase operations, the map will be
* shrunk when an insertion occurs. The erase method itself never shrinks
* the map.
*
* The default value of `min_load_factor` is 0.0f, the map never shrinks by
* default.
*/
void min_load_factor(float ml) { m_ht.min_load_factor(ml); }
void max_load_factor(float ml) { m_ht.max_load_factor(ml); }
void rehash(size_type count) { m_ht.rehash(count); }
void reserve(size_type count) { m_ht.reserve(count); }
/*
* Observers
*/
hasher hash_function() const { return m_ht.hash_function(); }
key_equal key_eq() const { return m_ht.key_eq(); }
/*
* Other
*/
/**
* Convert a const_iterator to an iterator.
*/
iterator mutable_iterator(const_iterator pos) {
return m_ht.mutable_iterator(pos);
}
friend bool operator==(const robin_map &lhs, const robin_map &rhs) {
if (lhs.size() != rhs.size()) {
return false;
}
for (const auto &element_lhs : lhs) {
const auto it_element_rhs = rhs.find(element_lhs.first);
if (it_element_rhs == rhs.cend() ||
element_lhs.second != it_element_rhs->second) {
return false;
}
}
return true;
}
friend bool operator!=(const robin_map &lhs, const robin_map &rhs) {
return !operator==(lhs, rhs);
}
friend void swap(robin_map &lhs, robin_map &rhs) { lhs.swap(rhs); }
private:
ht m_ht;
};
/**
* Same as `tsl::robin_map<Key, T, Hash, KeyEqual, Allocator, StoreHash,
* tsl::rh::prime_growth_policy>`.
*/
template <class Key, class T, class Hash = std::hash<Key>,
class KeyEqual = std::equal_to<Key>,
class Allocator = std::allocator<std::pair<Key, T>>,
bool StoreHash = false>
using robin_pg_map = robin_map<Key, T, Hash, KeyEqual, Allocator, StoreHash,
tsl::rh::prime_growth_policy>;
} // end namespace tsl
#endif
// Copyright 2019-2020 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <chrono>
#ifdef TV_CUDA
#include <cuda_runtime_api.h>
#endif
#include <iostream>
namespace spconv {
#ifdef TV_CUDA
template <typename TimeT = std::chrono::microseconds> struct CudaContextTimer {
CudaContextTimer() {
cudaDeviceSynchronize();
mCurTime = std::chrono::steady_clock::now();
}
typename TimeT::rep report() {
cudaDeviceSynchronize();
auto duration = std::chrono::duration_cast<TimeT>(
std::chrono::steady_clock::now() - mCurTime);
auto res = duration.count();
mCurTime = std::chrono::steady_clock::now();
return res;
}
private:
std::chrono::time_point<std::chrono::steady_clock> mCurTime;
};
#endif
template <typename TimeT = std::chrono::microseconds> struct CPUTimer {
CPUTimer() { mCurTime = std::chrono::steady_clock::now(); }
typename TimeT::rep report() {
auto duration = std::chrono::duration_cast<TimeT>(
std::chrono::steady_clock::now() - mCurTime);
auto res = duration.count();
mCurTime = std::chrono::steady_clock::now();
return res;
}
private:
std::chrono::time_point<std::chrono::steady_clock> mCurTime;
};
} // namespace spconv
[build-system]
requires = ["setuptools>=41.0", "wheel", "pccm>=0.2.5", "cumm>=0.1.3"]
build-backend = "setuptools.build_meta"
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Note: To use the 'upload' functionality of this file, you must:
# $ pip install twine
import io
import os
import platform
import re
import subprocess
import shutil
import sys
from distutils.version import LooseVersion
from pathlib import Path
from shutil import rmtree
from typing import List
import pccm
from pccm.extension import ExtCallback, PCCMBuild, PCCMExtension
from setuptools import Command, find_packages, setup
from setuptools.extension import Extension
# Package meta-data.
NAME = 'spconv'
RELEASE_NAME = NAME
deps = ["cumm"]
cuda_ver = os.environ.get("CUMM_CUDA_VERSON", "")
if cuda_ver:
cuda_ver = cuda_ver.replace(".", "") # 10.2 to 102
RELEASE_NAME += "-cu{}".format(cuda_ver)
deps = ["cumm-cu{}".format(cuda_ver)]
DESCRIPTION = 'spatial sparse convolution'
URL = 'https://github.com/traveller59/spconv'
EMAIL = 'yanyan.sub@outlook.com'
AUTHOR = 'Yan Yan'
REQUIRES_PYTHON = '>=3.7'
VERSION = None
# What packages are required for this module to be executed?
REQUIRED = ["pccm>=0.2.5", "pybind11>=2.6.0", "fire", "numpy", *deps]
# What packages are optional?
EXTRAS = {
# 'fancy feature': ['django'],
}
# The rest you shouldn't have to touch too much :)
# ------------------------------------------------
# Except, perhaps the License and Trove Classifiers!
# If you do change the License, remember to change the Trove Classifier for that!
here = os.path.abspath(os.path.dirname(__file__))
sys.path.append(str(Path(__file__).parent))
# Import the README and use it as the long-description.
# Note: this will only work if 'README.md' is present in your MANIFEST.in file!
try:
with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
long_description = '\n' + f.read()
except FileNotFoundError:
long_description = DESCRIPTION
# Load the package's __version__.py module as a dictionary.
about = {}
if not VERSION:
with open('version.txt', 'r') as f:
version = f.read().strip()
else:
version = VERSION
cwd = os.path.dirname(os.path.abspath(__file__))
import torch
from setuptools import Extension, find_packages, setup
from setuptools.command.build_ext import build_ext
def _convert_build_number(build_number):
parts = build_number.split(".")
if len(parts) == 2:
return "{}{:03d}".format(int(parts[0]), int(parts[1]))
elif len(parts) == 1:
return build_number
else:
raise NotImplementedError
# if 'LIBTORCH_ROOT' not in os.environ:
# raise ValueError("You must set LIBTORCH_ROOT to your torch c++ library.")
LIBTORCH_ROOT = str(Path(torch.__file__).parent)
env_suffix = os.environ.get("SPCONV_VERSION_SUFFIX", "")
if env_suffix != "":
version += ".dev{}".format(_convert_build_number(env_suffix))
version_path = os.path.join(cwd, NAME, '__version__.py')
about['__version__'] = version
SPCONV_FORCE_BUILD_CUDA = os.getenv("SPCONV_FORCE_BUILD_CUDA")
with open(version_path, 'w') as f:
f.write("__version__ = '{}'\n".format(version))
PYTHON_VERSION = "{}.{}".format(sys.version_info.major, sys.version_info.minor)
class UploadCommand(Command):
"""Support setup.py upload."""
remove_plus = torch.__version__.find("+")
PYTORCH_VERSION = torch.__version__
if remove_plus != -1:
PYTORCH_VERSION = torch.__version__[:remove_plus]
PYTORCH_VERSION = list(map(int, PYTORCH_VERSION.split(".")))
PYTORCH_VERSION_NUMBER = PYTORCH_VERSION[0] * 10000 + PYTORCH_VERSION[1] * 100 + PYTORCH_VERSION[2]
description = 'Build and publish the package.'
user_options = []
class CMakeExtension(Extension):
def __init__(self, name, sourcedir='', library_dirs=[]):
Extension.__init__(self, name, sources=[], library_dirs=library_dirs)
self.sourcedir = os.path.abspath(sourcedir)
@staticmethod
def status(s):
"""Prints things in bold."""
print('\033[1m{0}\033[0m'.format(s))
def initialize_options(self):
pass
def finalize_options(self):
pass
class CMakeBuild(build_ext):
def run(self):
try:
out = subprocess.check_output(['cmake', '--version'])
self.status('Removing previous builds...')
rmtree(os.path.join(here, 'dist'))
except OSError:
raise RuntimeError("CMake must be installed to build the following extensions: " +
", ".join(e.name for e in self.extensions))
if platform.system() == "Windows":
cmake_version = LooseVersion(re.search(r'version\s*([\d.]+)', out.decode()).group(1))
if cmake_version < '3.13.0':
raise RuntimeError("CMake >= 3.13.0 is required on Windows")
for ext in self.extensions:
self.build_extension(ext)
def build_extension(self, ext):
extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))
cmake_args = [# '-G "Visual Studio 15 2017 Win64"',
'-DCMAKE_PREFIX_PATH={}'.format(LIBTORCH_ROOT),
'-DPYBIND11_PYTHON_VERSION={}'.format(PYTHON_VERSION),
'-DSPCONV_BuildTests=OFF',
'-DPYTORCH_VERSION={}'.format(PYTORCH_VERSION_NUMBER),
] # -arch=sm_61
if not torch.cuda.is_available() and SPCONV_FORCE_BUILD_CUDA is None:
cmake_args += ['-DSPCONV_BuildCUDA=OFF']
else:
cuda_flags = ["\"--expt-relaxed-constexpr\""]
# must add following flags to use at::Half
# but will remove raw half operators.
cuda_flags += ["-D__CUDA_NO_HALF_OPERATORS__", "-D__CUDA_NO_HALF_CONVERSIONS__"]
# cuda_flags += ["-D__CUDA_NO_HALF2_OPERATORS__"]
cmake_args += ['-DCMAKE_CUDA_FLAGS=' + " ".join(cuda_flags)]
cfg = 'Debug' if self.debug else 'Release'
assert cfg == "Release", "pytorch ops don't support debug build."
build_args = ['--config', cfg]
print(cfg)
if platform.system() == "Windows":
cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), str(Path(extdir) / "spconv"))]
# cmake_args += ['-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), str(Path(extdir) / "spconv"))]
cmake_args += ['-DCMAKE_RUNTIME_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), str(Path(extdir) / "spconv"))]
cmake_args += ["-DCMAKE_WINDOWS_EXPORT_ALL_SYMBOLS=TRUE"]
if sys.maxsize > 2**32:
cmake_args += ['-A', 'x64']
build_args += ['--', '/m']
else:
cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}'.format(str(Path(extdir) / "spconv"))]
cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
build_args += ['--', '-j4']
env = os.environ.copy()
env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''),
self.distribution.get_version())
if not os.path.exists(self.build_temp):
os.makedirs(self.build_temp)
print("|||||CMAKE ARGS|||||", cmake_args)
subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env)
subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp)
packages = find_packages(exclude=('tools', 'tools.*'))
pass
self.status('Building Source and Wheel (universal) distribution...')
os.system('{0} setup.py sdist bdist_wheel --universal'.format(
sys.executable))
self.status('Uploading the package to PyPI via Twine...')
os.system('twine upload dist/*')
self.status('Pushing git tags...')
os.system('git tag v{0}'.format(about['__version__']))
os.system('git push --tags')
sys.exit()
disable_jit = os.getenv("SPCONV_DISABLE_JIT", None)
if disable_jit is not None and disable_jit == "1":
cmdclass = {
'upload': UploadCommand,
'build_ext': PCCMBuild,
}
from cumm.gemm.main import GemmMainUnitTest, SHUFFLE_SIMT_PARAMS, SHUFFLE_VOLTA_PARAMS, SHUFFLE_TURING_PARAMS
from spconv.csrc.sparse.all import SpconvOps
from cumm.gemm.gather import GatherAll
cu = GemmMainUnitTest(SHUFFLE_SIMT_PARAMS + SHUFFLE_VOLTA_PARAMS + SHUFFLE_TURING_PARAMS)
cu.namespace = "cumm.gemm.main"
ext_modules: List[Extension] = [
PCCMExtension([cu, SpconvOps(), GatherAll()],
"spconv/core_cc",
Path(__file__).resolve().parent / "spconv")
]
else:
cmdclass = {
'upload': UploadCommand,
}
ext_modules = []
# Where the magic happens:
setup(
name='spconv',
version='1.2.1',
author='Yan Yan',
author_email='scrin@foxmail.com',
description='spatial sparse convolution for pytorch',
long_description='',
setup_requires = ['torch>=1.3.0'],
packages=packages,
package_dir = {'spconv': 'spconv'},
ext_modules=[CMakeExtension('spconv', library_dirs=[])],
cmdclass=dict(build_ext=CMakeBuild),
zip_safe=False,
name=RELEASE_NAME,
version=about['__version__'],
description=DESCRIPTION,
long_description=long_description,
long_description_content_type='text/markdown',
author=AUTHOR,
author_email=EMAIL,
python_requires=REQUIRES_PYTHON,
url=URL,
packages=find_packages(exclude=('tests', )),
# If your package is a single module, use this instead of 'packages':
# py_modules=['mypackage'],
entry_points={
'console_scripts': [],
},
install_requires=REQUIRED,
extras_require=EXTRAS,
include_package_data=True,
license='MIT',
classifiers=[
# Trove classifiers
# Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
'License :: OSI Approved :: MIT License',
'Programming Language :: Python',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: Implementation :: CPython',
'Programming Language :: Python :: Implementation :: PyPy'
],
# $ setup.py publish support.
cmdclass=cmdclass,
ext_modules=ext_modules,
)
# Copyright 2019-2020 Yan Yan
#
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import platform
from pathlib import Path
from . import build as _build
import numpy as np
import torch
from spconv import ops, utils
from spconv.conv import (SparseConv2d, SparseConv3d, SparseConvTranspose2d,
SparseConvTranspose3d, SparseInverseConv2d,
SparseInverseConv3d, SubMConv2d, SubMConv3d)
from spconv.core import SparseConvTensor
from spconv.identity import Identity
from spconv.modules import SparseModule, SparseSequential
from spconv.ops import ConvAlgo
from spconv.pool import SparseMaxPool2d, SparseMaxPool3d
from spconv.tables import AddTable, ConcatTable, JoinTable
_LIB_FILE_NAME = "libspconv.so"
if platform.system() == "Windows":
_LIB_FILE_NAME = "spconv.dll"
_LIB_PATH = str(Path(__file__).parent / _LIB_FILE_NAME)
torch.ops.load_library(_LIB_PATH)
class ToDense(SparseModule):
"""convert SparseConvTensor to NCHW dense tensor.
"""
def forward(self, x: SparseConvTensor):
return x.dense()
class RemoveGrid(SparseModule):
"""remove pre-allocated grid buffer.
"""
def forward(self, x: SparseConvTensor):
x.grid = None
return x
from .algo import ConvAlgo
from . import utils, constants
\ No newline at end of file
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from enum import Enum
from cumm import tensorview as tv
from typing import Dict, List, Set, Tuple
from spconv.core_cc.cumm.gemm.main import GemmAlgoDesp, GemmMainUnitTest, GemmParams
# from spconv.core_cc.cumm.gemm.gather import GatherAll, ScatterAll
from cumm.gemm.algospec.core import ShuffleStrideType, get_min_arch_of_algo_str, get_available_algo_str_from_arch
from cumm.gemm.codeops import group_by, div_up
from typing import Optional
import time
import numpy as np
class ConvAlgo(Enum):
Native = "Native"
MaskImplicitGemm = "MaskImplicitGemm"
MaskSplitImplicitGemm = "MaskSplitImplicitGemm"
class AlgoHint(Enum):
NoHint = 0b000
Fowrard = 0b001
BackwardInput = 0b010
BackwardWeight = 0b100
ALL_ALGO_DESPS = GemmMainUnitTest.get_all_algo_desp()
_GEMM_STATIC_KEY = Tuple[bool, bool, bool, int, int, int, str, str]
# GATHER = GatherAll()
# SCATTER = ScatterAll()
class SimpleGemmAlgoMeta:
def __init__(self, tile_ms: List[int], tile_ns: List[int],
tile_ks: List[int],
tile_shape_to_algos: Dict[int, List[int]]) -> None:
self.tile_shape_to_algos = tile_shape_to_algos
self.tile_ms = tile_ms
self.tile_ns = tile_ns
self.tile_ks = tile_ks
class BestAlgoByProfile:
def __init__(self,
algo_desp: GemmAlgoDesp,
external_gather: bool,
external_scatter: bool,
gather_params: Optional[Tuple[int, int, int, int]] = None,
scatter_params: Optional[Tuple[int, int, int, int]] = None,
splitk: int = 1) -> None:
self.algo_desp = algo_desp
self.external_gather = external_gather
self.external_scatter = external_scatter
self.gather_params = gather_params
self.scatter_params = scatter_params
self.splitk = splitk
class SimpleGemm:
def __init__(self, desps: List[GemmAlgoDesp]) -> None:
self.desps = desps
self.static_key_to_desps = group_by(self.get_static_key, desps)
self.static_key_to_meta: Dict[_GEMM_STATIC_KEY,
SimpleGemmAlgoMeta] = {}
for k, static_desps in self.static_key_to_desps.items():
tile_shape_to_algos: Dict[int, List[int]] = {}
tile_ms: Set[int] = set()
tile_ns: Set[int] = set()
tile_ks: Set[int] = set()
for i, desp in enumerate(static_desps):
ts = desp.tile_shape
tile_ms.add(ts[0])
tile_ns.add(ts[1])
tile_ks.add(ts[2])
tile_key = ts[0] | (ts[1] << 20) | (ts[2] << 40)
if tile_key not in tile_shape_to_algos:
tile_shape_to_algos[tile_key] = []
tile_shape_to_algos[tile_key].append(i)
tile_ms_list = list(tile_ms)
tile_ns_list = list(tile_ns)
tile_ks_list = list(tile_ks)
tile_ms_list.sort()
tile_ns_list.sort()
tile_ks_list.sort()
self.static_key_to_meta[k] = SimpleGemmAlgoMeta(
tile_ms_list, tile_ns_list, tile_ks_list, tile_shape_to_algos)
self.nk_forward_cache: Dict[Tuple[int, int],
BestAlgoByProfile] = {} # for forward
self.nk_dgrad_cache: Dict[Tuple[int, int],
BestAlgoByProfile] = {} # for backward weight
self.mn_cache: Dict[Tuple[int, int],
BestAlgoByProfile] = {} # for backward weight
@staticmethod
def get_static_key(d: GemmAlgoDesp) -> _GEMM_STATIC_KEY:
return (d.trans_a, d.trans_b, d.trans_c, d.dtype_a, d.dtype_b,
d.dtype_c, d.shuffle_type, d.algo)
def device_synchronize(self):
return GemmMainUnitTest.device_synchronize()
def get_all_available(
self,
a: tv.Tensor,
b: tv.Tensor,
c: tv.Tensor,
trans_a: bool,
trans_b: bool,
trans_c: bool,
arch: Tuple[int, int],
shuffle_type: ShuffleStrideType = ShuffleStrideType.NoShuffle):
if trans_c:
trans_a = not trans_a
trans_b = not trans_b
trans_a, trans_b = trans_b, trans_a
a, b = b, a
trans_c = False
avail_algos = get_available_algo_str_from_arch(arch)
finally_algos: List[GemmAlgoDesp] = []
for algo in avail_algos:
static_key = (trans_a, trans_b, trans_c, a.dtype, b.dtype, c.dtype,
shuffle_type.value, algo)
desps = self.static_key_to_desps.get(static_key, None)
if desps is None or len(desps) == 0:
continue
for desp in desps:
lda = a.dim(1)
ldb = b.dim(1)
ldc = c.dim(1)
if desp.supported_ldx(lda, ldb, ldc):
finally_algos.append(desp)
return finally_algos
def select(self,
a: tv.Tensor,
b: tv.Tensor,
c: tv.Tensor,
trans_a: bool,
trans_b: bool,
trans_c: bool,
arch: Tuple[int, int],
shuffle_type: ShuffleStrideType = ShuffleStrideType.NoShuffle,
a_inds: tv.Tensor = tv.Tensor(),
b_inds: tv.Tensor = tv.Tensor(),
c_inds: tv.Tensor = tv.Tensor(),
hint: int = AlgoHint.NoHint.value):
m, n, k = GemmMainUnitTest.extract_mnk(a.shape, b.shape,
trans_a, trans_b, trans_c,
shuffle_type.value,
a_inds.shape, b_inds.shape,
c_inds.shape)
if trans_c:
trans_a = not trans_a
trans_b = not trans_b
trans_a, trans_b = trans_b, trans_a
a, b = b, a
trans_c = False
avail_algos = get_available_algo_str_from_arch(arch)
finally_algos: List[GemmAlgoDesp] = []
for algo in avail_algos:
static_key = (trans_a, trans_b, trans_c, a.dtype, b.dtype, c.dtype,
shuffle_type.value, algo)
desps = self.static_key_to_desps.get(static_key, None)
if desps is None or len(desps) == 0:
continue
meta = self.static_key_to_meta[static_key]
# for shuffle stride algos, we need to make channel tile size as large as possible.
# so if ShuffleAC, we need to make k largest.
selected_algo_desps = GemmMainUnitTest.simple_select_tile_shape(
m,
n,
k,
meta.tile_ms,
meta.tile_ns,
meta.tile_ks,
meta.tile_shape_to_algos,
large_k_first=shuffle_type == shuffle_type.ShuffleAC)
if not selected_algo_desps:
candidate = desps
else:
candidate = [desps[i] for i in selected_algo_desps]
# select by hint
if hint == 0:
return candidate[0]
if hint & (AlgoHint.Fowrard.value | AlgoHint.BackwardInput.value):
# m may be huge, n and k are small
# don't need mixed precision
# don't need splitk
finally_algos = []
if a.dtype == tv.float16:
dacc = tv.float16
dcomp = tv.float16
for can in candidate:
if can.dacc == dacc and can.dcomp == dcomp:
finally_algos.append(can)
else:
finally_algos = candidate
elif hint & AlgoHint.BackwardWeight.value:
# k is huge
# don't support i8
# if f16, acc and comp must be f32
finally_algos = []
candidate_filtered: List[GemmAlgoDesp] = list(
filter(lambda x: x.split_k_serial, candidate))
if not candidate_filtered:
candidate_filtered = candidate
if a.dtype == tv.int8:
continue
elif a.dtype == tv.float16:
dacc = tv.float32
dcomp = tv.float32
for can in candidate_filtered:
if can.dacc == dacc and can.dcomp == dcomp:
finally_algos.append(can)
else:
finally_algos = candidate_filtered
else:
return candidate[0]
# print(finally_algos)
if finally_algos:
return finally_algos[0]
return None
def get_profiled_algo(
self,
a_shape: List[int],
b_shape: List[int],
c_shape: List[int],
trans_a: bool,
trans_b: bool,
trans_c: bool,
arch: Tuple[int, int],
shuffle_type: ShuffleStrideType = ShuffleStrideType.NoShuffle,
a_inds_shape: Optional[List[int]] = None,
b_inds_shape: Optional[List[int]] = None,
c_inds_shape: Optional[List[int]] = None,
hint: int = AlgoHint.NoHint.value):
if a_inds_shape is None:
a_inds_shape = []
if b_inds_shape is None:
b_inds_shape = []
if c_inds_shape is None:
c_inds_shape = []
m, n, k = GemmMainUnitTest.extract_mnk(a_shape, b_shape,
trans_a, trans_b, trans_c,
shuffle_type.value,
a_inds_shape, b_inds_shape,
c_inds_shape)
if hint & AlgoHint.BackwardWeight.value:
key = (m, n)
return self.mn_cache.get(key, None)
elif hint & AlgoHint.BackwardInput.value:
key = (n, k)
return self.nk_dgrad_cache.get(key, None)
elif hint & AlgoHint.Fowrard.value:
key = (n, k)
return self.nk_forward_cache.get(key, None)
raise NotImplementedError
def extract_mnk(
self,
a_shape: List[int],
b_shape: List[int],
trans_a: bool,
trans_b: bool,
trans_c: bool,
arch: Tuple[int, int],
shuffle_type: ShuffleStrideType = ShuffleStrideType.NoShuffle,
a_inds_shape: Optional[List[int]] = None,
b_inds_shape: Optional[List[int]] = None,
c_inds_shape: Optional[List[int]] = None,
hint: int = AlgoHint.NoHint.value):
if a_inds_shape is None:
a_inds_shape = []
if b_inds_shape is None:
b_inds_shape = []
if c_inds_shape is None:
c_inds_shape = []
m, n, k = GemmMainUnitTest.extract_mnk(a_shape, b_shape,
trans_a, trans_b, trans_c,
shuffle_type.value,
a_inds_shape, b_inds_shape,
c_inds_shape)
return m, n, k
def profile_and_cache(
self,
a: tv.Tensor,
b: tv.Tensor,
c: tv.Tensor,
trans_a: bool,
trans_b: bool,
trans_c: bool,
arch: Tuple[int, int],
shuffle_type: ShuffleStrideType = ShuffleStrideType.NoShuffle,
a_inds: tv.Tensor = tv.Tensor(),
b_inds: tv.Tensor = tv.Tensor(),
c_inds: tv.Tensor = tv.Tensor(),
hint: int = AlgoHint.NoHint.value,
alpha: float = 1.0,
beta: float = 0.0,
gather_data: tv.Tensor = tv.Tensor(),
scatter_data: tv.Tensor = tv.Tensor(),
# mm_func
stream: int = 0):
m, n, k = GemmMainUnitTest.extract_mnk(a.shape, b.shape,
trans_a, trans_b, trans_c,
shuffle_type.value,
a_inds.shape, b_inds.shape,
c_inds.shape)
if hint & AlgoHint.BackwardWeight.value:
key = (m, n)
else:
key = (n, k)
avail = self.get_all_available(a, b, c, trans_a, trans_b, trans_c,
arch, shuffle_type)
c_ = c.clone()
times: List[float] = []
# gather_algos: List[GemmAlgoDesp] = []
# find fastest gather algo for this input
best_gather_params = (-1, -1, -1, -1)
best_scatter_params = (-1, -1, -1, -1)
# gather_data_ = tv.Tensor()
# if not gather_data.empty(
# ) and not hint & AlgoHint.BackwardWeight.value:
# # run gather here
# all_gather_params = GATHER.get_all_gather_params()
# gather_data_ = gather_data.clone()
# gather_times: List[float] = []
# for gather_params in all_gather_params:
# if GATHER.supported(gather_params[2], a.dim(1), a.dtype):
# this_times = []
# for j in range(10):
# GemmMainUnitTest.stream_synchronize(stream)
# t = time.time()
# GATHER.gather(gather_data_, a, a_inds, *gather_params)
# GemmMainUnitTest.stream_synchronize(stream)
# this_times.append(time.time() - t)
# gather_times.append(np.mean(this_times[5:]))
# min_time = 1000
# min_idx = -1
# for i, t in enumerate(gather_times):
# if t < min_time:
# min_time = t
# min_idx = i
# best_gather_params = all_gather_params[min_idx]
# if not scatter_data.empty(
# ) and not hint & AlgoHint.BackwardWeight.value:
# # run gather here
# all_scatter_params = SCATTER.get_all_scatter_params()
# scatter_data_ = scatter_data.clone()
# scatter_times: List[float] = []
# for params in all_scatter_params:
# if SCATTER.supported_scatter(*params, a.dim(1), a.dtype):
# this_times = []
# for j in range(10):
# GemmMainUnitTest.stream_synchronize(stream)
# t = time.time()
# SCATTER.scatter(c_, scatter_data_, c_inds, *params)
# GemmMainUnitTest.stream_synchronize(stream)
# this_times.append(time.time() - t)
# scatter_times.append(np.mean(this_times[5:]))
# min_time = 1000
# min_idx = -1
# for i, t in enumerate(scatter_times):
# if t < min_time:
# min_time = t
# min_idx = i
# best_scatter_params = all_scatter_params[min_idx]
all_profile_res: List[BestAlgoByProfile] = []
for desp in avail:
c_.zero_()
split_k_slices = 1
# TODO better splitk selection
if desp.split_k_serial and hint & AlgoHint.BackwardWeight.value:
split_k_slices = max(min(32, k // 128), 1)
params = GemmParams()
params.a = a
params.b = b
params.c = c_
params.a_inds = a_inds
params.b_inds = b_inds
params.c_inds = c_inds
params.algo_desp = desp
params.alpha = alpha
params.beta = beta
params.stream = stream
if desp.split_k_serial and hint & AlgoHint.BackwardWeight.value:
splitk_tests = [1, 2, 4, 8, 16, 32, 64]
else:
splitk_tests = [1]
spk_speeds = []
for spk in splitk_tests:
this_times = []
for j in range(3):
GemmMainUnitTest.stream_synchronize(stream)
t = time.time()
params.split_k_slices = spk
GemmMainUnitTest.matmul2(params)
GemmMainUnitTest.stream_synchronize(stream)
this_times.append(time.time() - t)
times.append(np.mean(this_times[1:]))
spk_speeds.append(times[-1])
all_profile_res.append(
BestAlgoByProfile(desp, False, False, best_gather_params, best_scatter_params, splitk=spk))
# if desp.split_k_serial:
# print(a.shape, b.shape, spk_speeds)
# if not gather_data.empty(
# ) and not hint & AlgoHint.BackwardWeight.value:
# # run gather here
# for spk in splitk_tests:
# this_times = []
# for j in range(3):
# GemmMainUnitTest.stream_synchronize(stream)
# t = time.time()
# params.a_inds = tv.Tensor()
# params.a = gather_data_
# params.split_k_slices = spk
# GATHER.gather(gather_data_,
# a,
# a_inds,
# *best_gather_params,
# stream=stream)
# GemmMainUnitTest.matmul2(params)
# GemmMainUnitTest.stream_synchronize(stream)
# this_times.append(time.time() - t)
# times.append(np.mean(this_times[1:]))
# # print("G", times[-1], times[-2])
# all_profile_res.append(
# BestAlgoByProfile(desp,
# True,
# False,
# best_gather_params, best_scatter_params,
# splitk=spk))
min_time = 1000
min_idx = -1
for i, t in enumerate(times):
if t < min_time:
min_time = t
min_idx = i
res = all_profile_res[min_idx]
if hint & AlgoHint.BackwardWeight.value:
key = (m, n)
self.mn_cache[key] = res
elif hint & AlgoHint.BackwardInput.value:
key = (n, k)
self.nk_dgrad_cache[key] = res
elif hint & AlgoHint.Fowrard.value:
key = (n, k)
self.nk_forward_cache[key] = res
else:
raise NotImplementedError
return res, min_time
def run_profile(
self,
profile_res: BestAlgoByProfile,
a: tv.Tensor,
b: tv.Tensor,
c: tv.Tensor,
trans_a: bool,
trans_b: bool,
trans_c: bool,
arch: Tuple[int, int],
stream: int,
shuffle_type: ShuffleStrideType = ShuffleStrideType.NoShuffle,
a_inds: tv.Tensor = tv.Tensor(),
b_inds: tv.Tensor = tv.Tensor(),
c_inds: tv.Tensor = tv.Tensor(),
hint: int = AlgoHint.NoHint.value,
alpha: float = 1.0,
beta: float = 0.0,
gather_data: tv.Tensor = tv.Tensor(),
workspace: tv.Tensor = tv.Tensor()):
m, n, k = GemmMainUnitTest.extract_mnk(a.shape, b.shape,
trans_a, trans_b, trans_c,
shuffle_type.value,
a_inds.shape, b_inds.shape,
c_inds.shape)
# GemmMainUnitTest.stream_synchronize(stream)
algo_desp = profile_res.algo_desp
assert algo_desp is not None
split_k_slices = 1
# TODO better splitk selection
# if algo_desp.split_k_serial and hint & AlgoHint.BackwardWeight.value:
# split_k_slices = max(min(32, k // 128), 1)
if profile_res.splitk > 1:
split_k_slices = profile_res.splitk
params = GemmParams()
params.a = a
params.b = b
params.c = c
params.a_inds = a_inds
params.b_inds = b_inds
params.c_inds = c_inds
params.algo_desp = algo_desp
params.split_k_slices = split_k_slices
params.stream = stream
params.alpha = alpha
params.beta = beta
params.workspace = workspace
# gather = 0
# if profile_res.external_gather and not gather_data.empty():
# GemmMainUnitTest.stream_synchronize(stream)
# tt = time.time()
# assert not gather_data.empty()
# params.a_inds = tv.Tensor()
# params.a = gather_data
# # print(profile_res.gather_params, gather_data.shape, a.shape, a_inds.shape)
# GATHER.gather(gather_data,
# a,
# a_inds,
# *profile_res.gather_params,
# stream=stream)
# GemmMainUnitTest.stream_synchronize(stream)
# gather = time.time() - tt
GemmMainUnitTest.matmul2(params)
# GemmMainUnitTest.stream_synchronize(stream)
return algo_desp
GEMM = SimpleGemm(ALL_ALGO_DESPS)
if __name__ == "__main__":
print(len(ALL_ALGO_DESPS))
print(ALL_ALGO_DESPS[0])
a = tv.zeros([64000, 32], dtype=tv.float16)
b = tv.zeros([32, 64], dtype=tv.float16)
c = tv.zeros([64000, 64], dtype=tv.float16)
a_inds = tv.zeros([64000], dtype=tv.int32)
c_inds = tv.zeros([64000], dtype=tv.int32)
t = time.time()
for i in range(100):
algo = GEMM.select(a,
c,
b,
True,
False,
False, (7, 5),
ShuffleStrideType.ShuffleAB,
a_inds=a_inds,
b_inds=c_inds)
print((time.time() - t) / 100)
print(algo)
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from pathlib import Path
import pccm
from pccm.utils import project_is_editable, project_is_installed
from .constants import PACKAGE_NAME, PACKAGE_ROOT
if project_is_installed(PACKAGE_NAME) and project_is_editable(PACKAGE_NAME):
from cumm.gemm.main import GemmMainUnitTest, SHUFFLE_SIMT_PARAMS, SHUFFLE_VOLTA_PARAMS, SHUFFLE_TURING_PARAMS
from spconv.csrc.sparse.all import SpconvOps
# from cumm.gemm.gather import GatherAll, ScatterAll
cu = GemmMainUnitTest(SHUFFLE_SIMT_PARAMS + SHUFFLE_VOLTA_PARAMS + SHUFFLE_TURING_PARAMS)
cu.namespace = "cumm.gemm.main"
pccm.builder.build_pybind([cu, SpconvOps()],
PACKAGE_ROOT / "core_cc",
namespace_root=PACKAGE_ROOT,
load_library=False)
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from pathlib import Path
from typing import List
from pccm.utils import project_is_editable, project_is_installed
PACKAGE_NAME = "spconv"
PACKAGE_ROOT = Path(__file__).parent.resolve()
EDITABLE_INSTALLED = project_is_installed(PACKAGE_NAME) and project_is_editable(PACKAGE_NAME)
_filter_hwio_env = os.getenv("SPCONV_FILTER_HWIO", "0")
FILTER_HWIO = _filter_hwio_env == "1"
\ No newline at end of file
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import overload, Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
from pccm.stubs import EnumValue, EnumClassValue
from cumm.tensorview import Tensor
class SpconvOps:
@staticmethod
def generate_conv_inds(indices: Tensor, hashdata: Tensor, indice_pairs: Tensor, indice_pairs_uniq: Tensor, out_inds: Tensor, indice_num_per_loc: Tensor, batch_size: int, output_dims: List[int], input_dims: List[int], ksize: List[int], stride: List[int], padding: List[int], dilation: List[int]) -> int:
"""
Args:
indices:
hashdata:
indice_pairs:
indice_pairs_uniq:
out_inds:
indice_num_per_loc:
batch_size:
output_dims:
input_dims:
ksize:
stride:
padding:
dilation:
"""
...
@staticmethod
def generate_conv_inds_stage1(indices: Tensor, indice_pairs: Tensor, indice_pairs_uniq: Tensor, indice_num_per_loc: Tensor, batch_size: int, output_dims: List[int], input_dims: List[int], ksize: List[int], stride: List[int], padding: List[int], dilation: List[int], stream_int: int = 0) -> int:
"""
Args:
indices:
indice_pairs:
indice_pairs_uniq:
indice_num_per_loc:
batch_size:
output_dims:
input_dims:
ksize:
stride:
padding:
dilation:
stream_int:
"""
...
@staticmethod
def generate_conv_inds_stage2(indices: Tensor, hashdata: Tensor, indice_pairs: Tensor, indice_pairs_uniq: Tensor, out_inds: Tensor, num_out_act: int, batch_size: int, output_dims: List[int], input_dims: List[int], ksize: List[int], stride: List[int], padding: List[int], dilation: List[int], stream_int: int = 0) -> int:
"""
Args:
indices:
hashdata:
indice_pairs:
indice_pairs_uniq:
out_inds:
num_out_act:
batch_size:
output_dims:
input_dims:
ksize:
stride:
padding:
dilation:
stream_int:
"""
...
@staticmethod
def generate_subm_conv_inds(indices: Tensor, hashdata: Tensor, indice_pairs: Tensor, out_inds: Tensor, indice_num_per_loc: Tensor, batch_size: int, input_dims: List[int], ksize: List[int], dilation: List[int], indice_pair_mask: Tensor = Tensor(), backward: bool = False, stream_int: int = 0) -> int:
"""
Args:
indices:
hashdata:
indice_pairs:
out_inds:
indice_num_per_loc:
batch_size:
input_dims:
ksize:
dilation:
indice_pair_mask:
backward:
stream_int:
"""
...
@staticmethod
def maxpool_forward(out: Tensor, inp: Tensor, out_inds: Tensor, in_inds: Tensor, stream: int = 0) -> None:
"""
Args:
out:
inp:
out_inds:
in_inds:
stream:
"""
...
@staticmethod
def maxpool_backward(out: Tensor, inp: Tensor, dout: Tensor, dinp: Tensor, out_inds: Tensor, in_inds: Tensor, stream: int = 0) -> None:
"""
Args:
out:
inp:
dout:
dinp:
out_inds:
in_inds:
stream:
"""
...
@staticmethod
def sort_1d_by_key(data: Tensor) -> Tensor:
"""
Args:
data:
"""
...
from typing import overload, Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
from pccm.stubs import EnumValue, EnumClassValue
from cumm.tensorview import Tensor
class Point2Voxel:
hashdata: Tensor
point_indice_data: Tensor
voxels: Tensor
indices: Tensor
num_per_voxel: Tensor
@property
def grid_size(self) -> List[int]: ...
def __init__(self, vsize_xyz: List[float], coors_range_xyz: List[float], num_point_features: int, max_num_voxels: int, max_num_points_per_voxel: int) -> None:
"""
Args:
vsize_xyz:
coors_range_xyz:
num_point_features:
max_num_voxels:
max_num_points_per_voxel:
"""
...
def point_to_voxel_hash(self, points: Tensor, clear_voxels: bool = True) -> Tuple[Tensor, Tensor, Tensor]:
"""
Args:
points:
clear_voxels:
"""
...
from typing import overload, Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
from pccm.stubs import EnumValue, EnumClassValue
from cumm.tensorview import Tensor
class Point2Voxel:
hashdata: Tensor
point_indice_data: Tensor
voxels: Tensor
indices: Tensor
num_per_voxel: Tensor
@property
def grid_size(self) -> List[int]: ...
def __init__(self, vsize_xyz: List[float], coors_range_xyz: List[float], num_point_features: int, max_num_voxels: int, max_num_points_per_voxel: int) -> None:
"""
Args:
vsize_xyz:
coors_range_xyz:
num_point_features:
max_num_voxels:
max_num_points_per_voxel:
"""
...
def point_to_voxel_hash(self, points: Tensor, clear_voxels: bool = True) -> Tuple[Tensor, Tensor, Tensor]:
"""
Args:
points:
clear_voxels:
"""
...
from typing import overload, Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
from pccm.stubs import EnumValue, EnumClassValue
from cumm.tensorview import Tensor
class Point2Voxel:
hashdata: Tensor
point_indice_data: Tensor
voxels: Tensor
indices: Tensor
num_per_voxel: Tensor
@property
def grid_size(self) -> List[int]: ...
def __init__(self, vsize_xyz: List[float], coors_range_xyz: List[float], num_point_features: int, max_num_voxels: int, max_num_points_per_voxel: int) -> None:
"""
Args:
vsize_xyz:
coors_range_xyz:
num_point_features:
max_num_voxels:
max_num_points_per_voxel:
"""
...
def point_to_voxel_hash(self, points: Tensor, clear_voxels: bool = True) -> Tuple[Tensor, Tensor, Tensor]:
"""
Args:
points:
clear_voxels:
"""
...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment