"src/git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "73bb97adfc3d0cb184c5fd66a1d5699c249a7fd8"
Unverified Commit f0b7cc96 authored by peizhou001's avatar peizhou001 Committed by GitHub
Browse files

[Performance]Add concurrent cpu id hashmap (#5241)

Add Id hash map
parent 5598503a
/**
* Copyright (c) 2023 by Contributors
* @file array/cpu/id_hash_map.cc
* @brief Class about id hash map
*/
#include "id_hash_map.h"
#ifdef _MSC_VER
#include <intrin.h>
#endif // _MSC_VER
#include <dgl/array.h>
#include <dgl/runtime/device_api.h>
#include <dgl/runtime/parallel_for.h>
#include <cmath>
#include <numeric>
using namespace dgl::runtime;
namespace {
static constexpr int64_t kEmptyKey = -1;
static constexpr int kGrainSize = 256;
// The formula is established from experience which is used
// to get the hashmap size from the input array size.
inline size_t GetMapSize(size_t num) {
size_t capacity = 1;
return capacity << static_cast<size_t>(1 + std::log2(num * 3));
}
} // namespace
namespace dgl {
namespace aten {
template <typename IdType>
IdType IdHashMap<IdType>::CompareAndSwap(
IdType* ptr, IdType old_val, IdType new_val) {
#ifdef _MSC_VER
if (sizeof(IdType) == 4) {
return _InterlockedCompareExchange(
reinterpret_cast<LONG*>(ptr), new_val, old_val);
} else if (sizeof(IdType) == 8) {
return _InterlockedCompareExchange64(
reinterpret_cast<LONGLONG*>(ptr), new_val, old_val);
} else {
LOG(FATAL) << "ID can only be int32 or int64";
}
#elif __GNUC__ // _MSC_VER
return __sync_val_compare_and_swap(ptr, old_val, new_val);
#else // _MSC_VER
#error "CompareAndSwap is not supported on this platform."
#endif // _MSC_VER
}
template <typename IdType>
IdHashMap<IdType>::IdHashMap() : mask_(0) {
// Used to deallocate the memory in hash_map_ with device api
// when the pointer is freed.
auto deleter = [](Mapping* mappings) {
if (mappings != nullptr) {
DGLContext ctx = DGLContext{kDGLCPU, 0};
auto device = DeviceAPI::Get(ctx);
device->FreeWorkspace(ctx, mappings);
}
};
hash_map_ = {nullptr, deleter};
}
template <typename IdType>
IdArray IdHashMap<IdType>::Init(const IdArray& ids) {
CHECK_EQ(ids.defined(), true);
const IdType* ids_data = ids.Ptr<IdType>();
const size_t num_ids = static_cast<size_t>(ids->shape[0]);
// Make sure `ids` is not 0 dim.
CHECK_GT(num_ids, 0);
size_t capacity = GetMapSize(num_ids);
mask_ = static_cast<IdType>(capacity - 1);
auto ctx = DGLContext{kDGLCPU, 0};
hash_map_.reset(static_cast<Mapping*>(
DeviceAPI::Get(ctx)->AllocWorkspace(ctx, sizeof(Mapping) * capacity)));
memset(hash_map_.get(), -1, sizeof(Mapping) * capacity);
// This code block is to fill the ids into hash_map_.
IdArray unique_ids = NewIdArray(num_ids, ctx, sizeof(IdType) * 8);
// An auxiliary array indicates whether the corresponding elements
// are inserted into hash map or not. Use `int16_t` instead of `bool` as
// vector<bool> is unsafe when updating different elements from different
// threads. See https://en.cppreference.com/w/cpp/container#Thread_safety.
std::vector<int16_t> valid(num_ids);
auto thread_num = compute_num_threads(0, num_ids, kGrainSize);
std::vector<size_t> block_offset(thread_num + 1, 0);
IdType* unique_ids_data = unique_ids.Ptr<IdType>();
// Insert all elements in this loop.
parallel_for(0, num_ids, kGrainSize, [&](int64_t s, int64_t e) {
size_t count = 0;
for (int64_t i = s; i < e; i++) {
Insert(ids_data[i], &valid, i);
count += valid[i];
}
block_offset[omp_get_thread_num() + 1] = count;
});
// Get ExclusiveSum of each block.
std::partial_sum(
block_offset.begin() + 1, block_offset.end(), block_offset.begin() + 1);
unique_ids->shape[0] = block_offset.back();
// Get unique array from ids and set value for hash map.
parallel_for(0, num_ids, kGrainSize, [&](int64_t s, int64_t e) {
auto tid = omp_get_thread_num();
auto pos = block_offset[tid];
for (int64_t i = s; i < e; i++) {
if (valid[i]) {
unique_ids_data[pos] = ids_data[i];
Set(ids_data[i], pos);
pos = pos + 1;
}
}
});
return unique_ids;
}
template <typename IdType>
IdArray IdHashMap<IdType>::MapIds(const IdArray& ids) const {
CHECK_EQ(ids.defined(), true);
const IdType* ids_data = ids.Ptr<IdType>();
const size_t num_ids = static_cast<size_t>(ids->shape[0]);
CHECK_GT(num_ids, 0);
DGLContext ctx = DGLContext{kDGLCPU, 0};
IdArray new_ids = NewIdArray(num_ids, ctx, sizeof(IdType) * 8);
IdType* values_data = new_ids.Ptr<IdType>();
parallel_for(0, num_ids, kGrainSize, [&](int64_t s, int64_t e) {
for (int64_t i = s; i < e; i++) {
values_data[i] = MapId(ids_data[i]);
}
});
return new_ids;
}
template <typename IdType>
inline void IdHashMap<IdType>::Next(IdType* pos, IdType* delta) const {
// Use Quadric probing.
*pos = (*pos + (*delta) * (*delta)) & mask_;
*delta = *delta + 1;
}
template <typename IdType>
IdType IdHashMap<IdType>::MapId(IdType id) const {
IdType pos = (id & mask_), delta = 1;
IdType empty_key = static_cast<IdType>(kEmptyKey);
while (hash_map_[pos].key != empty_key && hash_map_[pos].key != id) {
Next(&pos, &delta);
}
return hash_map_[pos].value;
}
template <typename IdType>
void IdHashMap<IdType>::Insert(
IdType id, std::vector<int16_t>* valid, size_t index) {
IdType pos = (id & mask_), delta = 1;
while (!AttemptInsertAt(pos, id, valid, index)) {
Next(&pos, &delta);
}
}
template <typename IdType>
void IdHashMap<IdType>::Set(IdType key, IdType value) {
IdType pos = (key & mask_), delta = 1;
while (hash_map_[pos].key != key) {
Next(&pos, &delta);
}
hash_map_[pos].value = value;
}
template <typename IdType>
bool IdHashMap<IdType>::AttemptInsertAt(
int64_t pos, IdType key, std::vector<int16_t>* valid, size_t index) {
IdType empty_key = static_cast<IdType>(kEmptyKey);
IdType old_val = CompareAndSwap(&(hash_map_[pos].key), empty_key, key);
if (old_val != empty_key && old_val != key) {
return false;
} else {
if (old_val == empty_key) (*valid)[index] = true;
return true;
}
}
template class IdHashMap<int32_t>;
template class IdHashMap<int64_t>;
} // namespace aten
} // namespace dgl
/**
* Copyright (c) 2023 by Contributors
* @file array/cpu/id_hash_map.h
* @brief Class about id hash map
*/
#ifndef DGL_ARRAY_CPU_ID_HASH_MAP_H_
#define DGL_ARRAY_CPU_ID_HASH_MAP_H_
#include <dgl/aten/types.h>
#include <functional>
#include <memory>
#include <vector>
namespace dgl {
namespace aten {
/**
* @brief A CPU targeted hashmap for mapping duplicate and non-consecutive ids
* in the provided array to unique and consecutive ones. It utilizes
* multi-threading to accelerate the insert and search speed. Currently it is
* only designed to be used in `ToBlockCpu` for optimizing, so it only support
* key insertions once with Init function, and it does not support key deletion.
*
* The hash map should be prepared in two phases before using. With the first
* being creating the hashmap, and then init it with an id array.
*
* For example, for an array A with following entries:
* [98, 98, 100, 99, 97, 99, 101, 100, 102]
* Create the hashmap H with:
* `H = CpuIdHashMap()` (1)
* And Init it with:
* `U = H.Init(A)` (2) (U is an id array used to store the unqiue
* ids in A).
* Then U should be (U is not exclusive as the element order is not
* guaranteed to be steady):
* [98, 100, 99, 97, 101, 102]
* And the hashmap should generate following mappings:
* * [
* {key: 98, value: 0},
* {key: 100, value: 1},
* {key: 99, value: 2},
* {key: 97, value: 3},
* {key: 101, value: 4},
* {key: 102, value: 5}
* ]
* Search the hashmap with array I=[98, 99, 102]:
* R = H.Map(I) (3)
* R should be:
* [0, 2, 5]
**/
template <typename IdType>
class IdHashMap {
public:
/**
* @brief An entry in the hashtable.
*/
struct Mapping {
/**
* @brief The ID of the item inserted.
*/
IdType key;
/**
* @brief The value of the item inserted.
*/
IdType value;
};
/**
* @brief Cross platform CAS operation.
* It is an atomic operation that compares the contents of a memory
* location with a given value and, only if they are the same, modifies
* the contents of that memory location to a new given value.
*
* @param ptr The pointer to the object to test and modify .
* @param old_val The value expected to be found in `ptr`.
* @param new_val The value to store in `ptr` if it is as expected.
*
* @return Old value pointed by the `ptr`.
*/
static IdType CompareAndSwap(IdType* ptr, IdType old_val, IdType new_val);
IdHashMap();
IdHashMap(const IdHashMap& other) = delete;
IdHashMap& operator=(const IdHashMap& other) = delete;
/**
* @brief Init the hashmap with an array of ids.
* Firstly allocating the memeory and init the entire space with empty key.
* And then insert the items in `ids` concurrently to generate the
* mappings, in passing returning the unique ids in `ids`.
*
* @param ids The array of ids to be inserted as keys.
*
* @return Unique ids for the input `ids`.
*/
IdArray Init(const IdArray& ids);
/**
* @brief Find the mappings of given keys.
*
* @param ids The keys to map for.
*
* @return Mapping results corresponding to `ids`.
*/
IdArray MapIds(const IdArray& ids) const;
private:
/**
* @brief Get the next position and delta for probing.
*
* @param[in,out] pos Calculate the next position with quadric probing.
* @param[in,out] delta Calculate the next delta by adding 1.
*/
void Next(IdType* pos, IdType* delta) const;
/**
* @brief Find the mapping of a given key.
*
* @param id The key to map for.
*
* @return Mapping result for the `id`.
*/
IdType MapId(const IdType id) const;
/**
* @brief Insert an id into the hash map.
*
* @param id The id to be inserted.
* @param valid The item at index will be set to indicate
* whether the `id` at `index` is inserted or not.
* @param index The index of the `id`.
*
*/
void Insert(IdType id, std::vector<int16_t>* valid, size_t index);
/**
* @brief Set the value for the key in the hash map.
*
* @param key The key to set for.
* @param value The value to be set for the `key`.
*
* @warning Key must exist.
*/
void Set(IdType key, IdType value);
/**
* @brief Attempt to insert the key into the hash map at the given position.
* 1. If the key at `pos` is empty -> Set the key, return true and set
* `valid[index]` to true.
* 2. If the key at `pos` is equal to `key` -> Return true.
* 3. If the key at `pos` is non-empty and not equal to `key` -> Return false.
* @param pos The position in the hash map to be inserted at.
* @param key The key to be inserted.
* @param valid The item at index will be set to indicate
* whether the `key` at `index` is inserted or not.
* @param index The index of the `key`.
*
* @return Whether the key exists in the map now.
*/
bool AttemptInsertAt(
int64_t pos, IdType key, std::vector<int16_t>* valid, size_t index);
private:
/**
* @brief Hash maps which is used to store all elements.
*/
std::unique_ptr<Mapping[], std::function<void(Mapping*)>> hash_map_;
/**
* @brief Mask which is assisted to get the position in the table
* for a key by performing `&` operation with it.
*/
IdType mask_;
};
} // namespace aten
} // namespace dgl
#endif // DGL_ARRAY_CPU_ID_HASH_MAP_H_
#include <dgl/array.h>
#include <dgl/runtime/parallel_for.h>
#include <gtest/gtest.h>
#include <set>
#include "../../src/array/cpu/id_hash_map.h"
#include "./common.h"
using namespace dgl;
using namespace dgl::runtime;
using namespace dgl::aten;
namespace {
template <typename IdType>
void ConstructRandomSet(
size_t size, IdType range, std::vector<IdType>& id_vec) {
id_vec.resize(size);
std::srand(std::time(nullptr));
for (size_t i = 0; i < size; i++) {
id_vec[i] = static_cast<IdType>(std::rand() % range);
}
}
template <typename IdType, size_t size, IdType range>
void _TestIdMap() {
std::vector<IdType> id_vec;
ConstructRandomSet(size, range, id_vec);
std::set<IdType> id_set(id_vec.begin(), id_vec.end());
IdArray ids = VecToIdArray(id_vec, sizeof(IdType) * 8, CTX);
IdHashMap<IdType> id_map;
IdArray unique_ids = id_map.Init(ids);
auto unique_num = static_cast<size_t>(unique_ids->shape[0]);
IdType* unique_id_data = unique_ids.Ptr<IdType>();
EXPECT_EQ(id_set.size(), unique_num);
parallel_for(0, unique_num, 128, [&](int64_t s, int64_t e) {
for (int64_t i = s; i < e; i++) {
EXPECT_TRUE(id_set.find(unique_id_data[i]) != id_set.end());
}
});
IdArray new_ids = id_map.MapIds(unique_ids);
EXPECT_TRUE(new_ids.IsContiguous());
}
TEST(IdHashMapTest, TestIdHashMap) {
_TestIdMap<int32_t, 1, 10>();
_TestIdMap<int64_t, 1, 10>();
_TestIdMap<int32_t, 1000, 500000>();
_TestIdMap<int64_t, 1000, 500000>();
_TestIdMap<int32_t, 50000, 1000000>();
_TestIdMap<int64_t, 50000, 1000000>();
_TestIdMap<int32_t, 100000, 40000000>();
_TestIdMap<int64_t, 100000, 40000000>();
}
}; // namespace
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment