Commit 2390104c authored by PanZezhong's avatar PanZezhong
Browse files

issue/125 add pybind for paged attention cache config

parent 8cf62baa
......@@ -30,6 +30,23 @@ inline void bind_cache(py::module &m) {
.def("__repr__", [](const infinilm::cache::StaticKVCacheConfig &) {
return "<StaticKVCacheConfig>";
});
py::class_<infinilm::cache::PagedKVCacheConfig,
infinilm::cache::CacheConfig,
std::shared_ptr<infinilm::cache::PagedKVCacheConfig>>(m, "PagedKVCacheConfig")
.def(
py::init<size_t, size_t>(),
py::arg("max_kv_memory_bytes"),
py::arg("block_size") = 16)
.def(
"max_kv_memory_bytes",
&infinilm::cache::PagedKVCacheConfig::max_kv_memory_bytes)
.def(
"block_size",
&infinilm::cache::PagedKVCacheConfig::block_size)
.def("__repr__", [](const infinilm::cache::PagedKVCacheConfig &) {
return "<PagedKVCacheConfig>";
});
}
} // namespace infinilm::cache
\ No newline at end of file
} // namespace infinilm::cache
......@@ -11,3 +11,16 @@ class CacheConfig(_infinilm.CacheConfig):
class StaticKVCacheConfig(CacheConfig, _infinilm.StaticKVCacheConfig):
def __init__(self, max_batch_size: int = 1, max_cache_len: int = 0):
_infinilm.StaticKVCacheConfig.__init__(self, max_batch_size, max_cache_len)
class PagedKVCacheConfig(CacheConfig, _infinilm.PagedKVCacheConfig):
def __init__(
self,
max_kv_memory_bytes: int,
block_size: int = 16,
):
_infinilm.PagedKVCacheConfig.__init__(
self,
max_kv_memory_bytes,
block_size,
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment