#ifndef __GPU_CACHE_HH_ #define __GPU_CACHE_HH_ #include #include "cache_entry.hh" #include "cuda_stream_manager.hh" #include "defs.h" #include "kvc2.h" #include "metrics.h" #include "utils/periodic_task.hpp" namespace kvc2 { class GPUPageCache { std::vector gpu_devices; std::vector shape; size_t tensor_size; std::vector tp_offset; std::vector tp_size; // met std::shared_ptr met; // states std::mutex lock; size_t num_free_pages; std::vector gpu_only_occupations; std::vector>> occupations,v_occupations; size_t _col_idx = 0; // cuda stream manager std::optional next_empty_col(); public: GPUPageCacheConfig config; std::unique_ptr stream_manager; std::vector k_cache; std::vector v_cache; std::unique_ptr background_flush_back =nullptr; GPUPageCache(GPUPageCacheConfig& config); std::vector gpu_only_alloc_col(size_t count); void gpu_only_free_cols(std::vector cols); void gpu_background_flush(); bool alloc_col(std::vector>>& k_entries, std::vector>>& v_entries, size_t at); void evict_cols(); void flush_col(size_t at); std::vector> try_lock_col(size_t at); void free_col(size_t at); std::vector> basic_request(cudaMemcpyKind direction, std::function callback); void submit_requests(std::vector> reqs); void append_col_to_request(std::vector>& reqs, std::vector>>& k_handles, std::vector>>& v_handles, size_t at); void debug(); }; } // namespace kvc2 #endif