cache.hpp 1.89 KB
Newer Older
PanZezhong's avatar
PanZezhong committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#include "../../cache/cache.hpp"
#include "infinicore/tensor.hpp"
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>

namespace py = pybind11;

namespace infinilm::cache {

inline void bind_cache(py::module &m) {
    py::class_<infinilm::cache::CacheConfig,
               std::shared_ptr<infinilm::cache::CacheConfig>>(m, "CacheConfig")
        .def("__repr__", [](const infinilm::cache::CacheConfig &) {
            return "<CacheConfig (abstract)>";
        });

    py::class_<infinilm::cache::StaticKVCacheConfig,
               infinilm::cache::CacheConfig,
               std::shared_ptr<infinilm::cache::StaticKVCacheConfig>>(m, "StaticKVCacheConfig")
        .def(
            py::init<infinicore::Size, infinicore::Size>(),
            py::arg("max_batch_size") = 1,
            py::arg("max_cache_len") = std::numeric_limits<infinicore::Size>::max())
        .def(
            "max_batch_size",
            &infinilm::cache::StaticKVCacheConfig::max_batch_size)
        .def(
            "max_cache_len",
            &infinilm::cache::StaticKVCacheConfig::max_cache_len)
        .def("__repr__", [](const infinilm::cache::StaticKVCacheConfig &) {
            return "<StaticKVCacheConfig>";
        });
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49

    py::class_<infinilm::cache::PagedKVCacheConfig,
               infinilm::cache::CacheConfig,
               std::shared_ptr<infinilm::cache::PagedKVCacheConfig>>(m, "PagedKVCacheConfig")
        .def(
            py::init<size_t, size_t>(),
            py::arg("max_kv_memory_bytes"),
            py::arg("block_size") = 16)
        .def(
            "max_kv_memory_bytes",
            &infinilm::cache::PagedKVCacheConfig::max_kv_memory_bytes)
        .def(
            "block_size",
            &infinilm::cache::PagedKVCacheConfig::block_size)
        .def("__repr__", [](const infinilm::cache::PagedKVCacheConfig &) {
            return "<PagedKVCacheConfig>";
        });
PanZezhong's avatar
PanZezhong committed
50
51
}

52
} // namespace infinilm::cache