// Copyright (c) OpenMMLab. All rights reserved. #pragma once #include "src/turbomind/utils/allocator.h" #include "src/turbomind/utils/cuda_utils.h" #include "src/turbomind/utils/logger.h" #include #include #include #include #include #include #include #include namespace turbomind { // [L, H, S, D] // [L, S/x, H, x, D] struct Block { int id; // fixed linear id in the pool int ref_count; // all sequences referencing the block int use_count; // active sequences using the block uint64_t unique_id; // unique for every block allocation uint64_t timestamp; void* data; friend std::ostream& operator<<(std::ostream& os, const Block& block); }; inline bool is_active(const Block& block) { return block.ref_count > 0 && block.use_count > 0; } inline bool is_cached(const Block& block) { return block.ref_count > 0 && block.use_count == 0; } inline bool is_free(const Block& block) { return block.ref_count == 0 && block.use_count == 0 && block.timestamp == 0; } struct Snapshot { int active; int cached; int free; std::vector use_count; }; class BlockManager { public: explicit BlockManager(size_t block_size, double block_count, int chunk_size, IAllocator* allocator); ~BlockManager(); // free -> active (use_count = 1, ref_count = 1) [[nodiscard]] std::vector Allocate(int count); // cached -> active (use_count += 1) [[maybe_unused]] int Lock(const std::vector& bs); // active -> cached (use_count -= 1) [[maybe_unused]] int Unlock(const std::vector& bs); // cached -> free (ref_count = 0) void Evict(int count); // cached -> free (ref_count -= 1) [[maybe_unused]] int Free(const std::vector& bs); // increase timestamp in reversed order void Touch(const std::vector& bs); Snapshot TakeSnapshot(); int max_block_count() const noexcept { return max_block_count_; } int active_count() const noexcept { return active_ids_.size(); } int cached_count() const noexcept { return cached_ids_.size(); } int free_count() const noexcept { return (max_block_count_ - blocks_.size()) + free_ids_.size(); } friend std::ostream& operator<<(std::ostream& os, const BlockManager&); private: static size_t GetBlockCount(size_t block_size, double ratio); // move indices between sets static void Move(std::vector& src, const std::vector& delta, std::vector& dst); // allocate a chunk of blocks bool Malloc(); private: size_t block_size_; int max_block_count_{}; int chunk_size_{}; IAllocator* allocator_; std::vector chunks_; std::vector active_ids_; std::vector cached_ids_; std::vector free_ids_; std::vector blocks_; // < 100k // uint64_t unique_id_{1UL << 63}; uint64_t unique_id_{1}; uint64_t timestamp_{1}; }; } // namespace turbomind