Unverified Commit 23b1306c authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

Merge pull request #165 from InfiniTensor/issue/125-bind

issue/125 add pybind for paged attention cache config
parents 8cf62baa 2390104c
...@@ -30,6 +30,23 @@ inline void bind_cache(py::module &m) { ...@@ -30,6 +30,23 @@ inline void bind_cache(py::module &m) {
.def("__repr__", [](const infinilm::cache::StaticKVCacheConfig &) { .def("__repr__", [](const infinilm::cache::StaticKVCacheConfig &) {
return "<StaticKVCacheConfig>"; return "<StaticKVCacheConfig>";
}); });
py::class_<infinilm::cache::PagedKVCacheConfig,
infinilm::cache::CacheConfig,
std::shared_ptr<infinilm::cache::PagedKVCacheConfig>>(m, "PagedKVCacheConfig")
.def(
py::init<size_t, size_t>(),
py::arg("max_kv_memory_bytes"),
py::arg("block_size") = 16)
.def(
"max_kv_memory_bytes",
&infinilm::cache::PagedKVCacheConfig::max_kv_memory_bytes)
.def(
"block_size",
&infinilm::cache::PagedKVCacheConfig::block_size)
.def("__repr__", [](const infinilm::cache::PagedKVCacheConfig &) {
return "<PagedKVCacheConfig>";
});
} }
} // namespace infinilm::cache } // namespace infinilm::cache
\ No newline at end of file
...@@ -11,3 +11,16 @@ class CacheConfig(_infinilm.CacheConfig): ...@@ -11,3 +11,16 @@ class CacheConfig(_infinilm.CacheConfig):
class StaticKVCacheConfig(CacheConfig, _infinilm.StaticKVCacheConfig): class StaticKVCacheConfig(CacheConfig, _infinilm.StaticKVCacheConfig):
def __init__(self, max_batch_size: int = 1, max_cache_len: int = 0): def __init__(self, max_batch_size: int = 1, max_cache_len: int = 0):
_infinilm.StaticKVCacheConfig.__init__(self, max_batch_size, max_cache_len) _infinilm.StaticKVCacheConfig.__init__(self, max_batch_size, max_cache_len)
class PagedKVCacheConfig(CacheConfig, _infinilm.PagedKVCacheConfig):
def __init__(
self,
max_kv_memory_bytes: int,
block_size: int = 16,
):
_infinilm.PagedKVCacheConfig.__init__(
self,
max_kv_memory_bytes,
block_size,
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment