issue/125 add pybind for paged attention cache config

2390104c · PanZezhong · 8cf62baa · 2390104c · 2390104c
Commit 2390104c authored Dec 26, 2025 by PanZezhong
Hide whitespace changes
Inline Side-by-side

Showing with 31 additions and 1 deletion

csrc/pybind11/cache/cache.hpp csrc/pybind11/cache/cache.hpp +18 -1

python/infinilm/cache/cache.py python/infinilm/cache/cache.py +13 -0

No files found.
--- a/csrc/pybind11/cache/cache.hpp
+++ b/csrc/pybind11/cache/cache.hpp
@@ -30,6 +30,23 @@ inline void bind_cache(py::module &m) {
        .def("__repr__", [](const infinilm::cache::StaticKVCacheConfig &) {
            return "<StaticKVCacheConfig>";
        });
+
+    py::class_<infinilm::cache::PagedKVCacheConfig,
+               infinilm::cache::CacheConfig,
+               std::shared_ptr<infinilm::cache::PagedKVCacheConfig>>(m, "PagedKVCacheConfig")
+        .def(
+            py::init<size_t, size_t>(),
+            py::arg("max_kv_memory_bytes"),
+            py::arg("block_size") = 16)
+        .def(
+            "max_kv_memory_bytes",
+            &infinilm::cache::PagedKVCacheConfig::max_kv_memory_bytes)
+        .def(
+            "block_size",
+            &infinilm::cache::PagedKVCacheConfig::block_size)
+        .def("__repr__", [](const infinilm::cache::PagedKVCacheConfig &) {
+            return "<PagedKVCacheConfig>";
+        });
 }

-} // namespace infinilm::cache
\ No newline at end of file
+} // namespace infinilm::cache
--- a/python/infinilm/cache/cache.py
+++ b/python/infinilm/cache/cache.py
@@ -11,3 +11,16 @@ class CacheConfig(_infinilm.CacheConfig):
 class StaticKVCacheConfig(CacheConfig, _infinilm.StaticKVCacheConfig):
    def __init__(self, max_batch_size: int = 1, max_cache_len: int = 0):
        _infinilm.StaticKVCacheConfig.__init__(self, max_batch_size, max_cache_len)
+
+
+class PagedKVCacheConfig(CacheConfig, _infinilm.PagedKVCacheConfig):
+    def __init__(
+        self,
+        max_kv_memory_bytes: int,
+        block_size: int = 16,
+    ):
+        _infinilm.PagedKVCacheConfig.__init__(
+            self,
+            max_kv_memory_bytes,
+            block_size,
+        )