__init__.py 736 Bytes
Newer Older
chenzk's avatar
chenzk committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
KV-cache pruning (compactor-style) under ``vllm.kvprune``.

Use the standard :class:`~vllm.LLM` and pass ``compression=`` to :meth:`~vllm.LLM.generate`
with :class:`CompressionParams` when any prompt needs ``compression_ratio < 1``. The compactor
``LLMEngine`` + ``PagedKVCache`` shares weights with vLLM (no second checkpoint).

Subpackages (``attention``, ``kv_cache``, ``compression``, …) implement the compactor
engine.
"""

from vllm.kvprune.compression.compression_config import CompressionMethod
from vllm.kvprune.integration import CompressionParams

__all__ = [
    "CompressionMethod",
    "CompressionParams",
]