Unverified Commit 62fe6999 authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

Merge pull request #828 from InfiniTensor/issue/809

issue/809 支持锁定、缓存的内存分配器
parents 0ead67fc f4966bab
#include "pinnable_block_allocator.hpp"
#include "../../utils.hpp"
#include <algorithm>
#include <infinirt.h>
#include <stdexcept>
namespace infinicore {
// ------------------- Helper functions -------------------
// Round up size to nearest multiple of alignment
inline size_t align_up(size_t size, size_t alignment) {
return (size + alignment - 1) / alignment * alignment;
}
// ------------------- Constructor -------------------
PinnableBlockAllocator::PinnableBlockAllocator(Device device)
: device_(device) {
size_classes_ = {
{256 * 1024, {}}, // 256 KB
{1 * 1024 * 1024, {}}, // 1 MB
{4 * 1024 * 1024, {}}, // 4 MB
{16 * 1024 * 1024, {}}, // 16 MB
{64 * 1024 * 1024, {}}, // 64 MB
{256 * 1024 * 1024, {}}, // 256 MB
};
}
// ------------------- allocate -------------------
std::byte *PinnableBlockAllocator::allocate(size_t size) {
std::lock_guard<std::mutex> lock(mutex_);
// Align size to 256 bytes for GPU
size = align_up(size, 256);
std::shared_ptr<Block> block;
// 1. Try size-class allocation for small/medium
for (auto &cls : size_classes_) {
if (size <= cls.block_size) {
if (!cls.free_blocks.empty()) {
block = cls.free_blocks.back();
cls.free_blocks.pop_back();
block->in_use = true;
return reinterpret_cast<std::byte *>(block->ptr);
}
// Allocate a new block for this class
block = std::make_shared<Block>();
block->size = cls.block_size;
block->frozen = pinned_mode_;
block->in_use = true;
INFINICORE_CHECK_ERROR(infinirtMalloc(&block->ptr, block->size));
all_blocks_[block->ptr] = block;
return reinterpret_cast<std::byte *>(block->ptr);
}
}
// 2. Large block allocation
// Try to reuse a frozen or free large block
auto it = std::find_if(large_blocks_.begin(), large_blocks_.end(),
[size](const std::shared_ptr<Block> &b) { return b->size >= size && !b->in_use; });
if (it != large_blocks_.end()) {
block = *it;
block->in_use = true;
block->frozen = block->frozen || pinned_mode_;
return reinterpret_cast<std::byte *>(block->ptr);
}
// Allocate new large block
block = std::make_shared<Block>();
block->size = size;
block->frozen = pinned_mode_;
block->in_use = true;
INFINICORE_CHECK_ERROR(infinirtMalloc(&block->ptr, block->size));
large_blocks_.push_back(block);
all_blocks_[block->ptr] = block;
return reinterpret_cast<std::byte *>(block->ptr);
}
// ------------------- deallocate -------------------
void PinnableBlockAllocator::deallocate(std::byte *ptr) {
if (!ptr) {
return;
}
std::lock_guard<std::mutex> lock(mutex_);
auto it = all_blocks_.find(reinterpret_cast<void *>(ptr));
if (it == all_blocks_.end()) {
throw std::runtime_error("Pointer not allocated by this allocator");
}
auto block = it->second;
if (!block->in_use) {
throw std::runtime_error("Double free detected in PinnableBlockAllocator");
}
block->in_use = false;
if (!block->in_use) {
for (auto &cls : size_classes_) {
if (block->size == cls.block_size) {
cls.free_blocks.push_back(block);
break;
}
}
}
}
// ------------------- trim -------------------
void PinnableBlockAllocator::trim() {
std::lock_guard<std::mutex> lock(mutex_);
// Free non-frozen size-class blocks
for (auto &cls : size_classes_) {
for (auto it = cls.free_blocks.begin(); it != cls.free_blocks.end();) {
if (!(*it)->frozen) {
INFINICORE_CHECK_ERROR(infinirtFree((*it)->ptr));
all_blocks_.erase((*it)->ptr);
it = cls.free_blocks.erase(it);
} else {
++it;
}
}
}
// Free non-frozen large blocks
for (auto it = large_blocks_.begin(); it != large_blocks_.end();) {
if (!(*it)->frozen && !(*it)->in_use) {
INFINICORE_CHECK_ERROR(infinirtFree((*it)->ptr));
all_blocks_.erase((*it)->ptr);
it = large_blocks_.erase(it);
} else {
++it;
}
}
}
// ------------------- Destructor -------------------
PinnableBlockAllocator::~PinnableBlockAllocator() {
std::lock_guard<std::mutex> lock(mutex_);
for (auto &p : all_blocks_) {
if (p.second->ptr) {
infinirtFree(p.second->ptr);
}
}
all_blocks_.clear();
large_blocks_.clear();
for (auto &cls : size_classes_) {
cls.free_blocks.clear();
}
}
} // namespace infinicore
#pragma once
#include "memory_allocator.hpp"
#include "../context_impl.hpp"
#include <mutex>
#include <unordered_map>
#include <vector>
namespace infinicore {
class PinnableBlockAllocator : public MemoryAllocator {
// Represents a single memory block
struct Block {
void *ptr = nullptr; // Device pointer
size_t size = 0; // Block size in bytes
bool frozen = false; // True if used in pinned/graph mode
bool in_use = false; // Wether the block is currently in use
};
// A simple size-class allocator for small/medium blocks
struct SizeClass {
size_t block_size; // Fixed size for this class
std::vector<std::shared_ptr<Block>> free_blocks;
};
public:
explicit PinnableBlockAllocator(Device device);
~PinnableBlockAllocator();
std::byte *allocate(size_t size) override;
void deallocate(std::byte *ptr) override;
// Switch pinned/graph mode
void set_pin_mode(bool pinned) { pinned_mode_ = pinned; }
// trim cached blocks back to GPU (not pinned)
void trim();
private:
Device device_;
bool pinned_mode_ = false;
std::vector<SizeClass> size_classes_;
std::vector<std::shared_ptr<Block>> large_blocks_;
std::unordered_map<void *, std::shared_ptr<Block>> all_blocks_;
std::mutex mutex_; // Thread safety
};
} // namespace infinicore
#include "device_caching_allocator.hpp"
#include "stream_ordered_allocator.hpp"
#include <infinirt.h>
#include "../../utils.hpp"
namespace infinicore {
DeviceCachingAllocator::DeviceCachingAllocator(Device device) : MemoryAllocator(), device_(device) {}
StreamOrderedAllocator::StreamOrderedAllocator(Device device) : MemoryAllocator(), device_(device) {}
std::byte *DeviceCachingAllocator::allocate(size_t size) {
std::byte *StreamOrderedAllocator::allocate(size_t size) {
void *ptr = nullptr;
INFINICORE_CHECK_ERROR(infinirtMallocAsync(&ptr, size, context::getStream()));
return (std::byte *)ptr;
}
void DeviceCachingAllocator::deallocate(std::byte *ptr) {
void StreamOrderedAllocator::deallocate(std::byte *ptr) {
INFINICORE_CHECK_ERROR(infinirtFreeAsync(ptr, context::getStream()));
}
} // namespace infinicore
......@@ -5,10 +5,10 @@
#include "../context_impl.hpp"
namespace infinicore {
class DeviceCachingAllocator : public MemoryAllocator {
class StreamOrderedAllocator : public MemoryAllocator {
public:
explicit DeviceCachingAllocator(Device device);
~DeviceCachingAllocator() = default;
explicit StreamOrderedAllocator(Device device);
~StreamOrderedAllocator() = default;
std::byte *allocate(size_t size) override;
void deallocate(std::byte *ptr) override;
......
......@@ -2,9 +2,10 @@
#include "../../utils.hpp"
#include "../allocators/device_caching_allocator.hpp"
#include "../allocators/device_pinned_allocator.hpp"
#include "../allocators/host_allocator.hpp"
#include "../allocators/pinnable_block_allocator.hpp"
#include "../allocators/stream_ordered_allocator.hpp"
namespace infinicore {
Runtime::Runtime(Device device) : device_(device) {
......@@ -14,7 +15,7 @@ Runtime::Runtime(Device device) : device_(device) {
if (device_.getType() == Device::Type::CPU) {
device_memory_allocator_ = std::make_unique<HostAllocator>();
} else {
device_memory_allocator_ = std::make_unique<DeviceCachingAllocator>(device);
device_memory_allocator_ = std::make_unique<PinnableBlockAllocator>(device);
pinned_host_memory_allocator_ = std::make_unique<DevicePinnedHostAllocator>(device);
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment