kv_transfer.py 4.16 KB
Newer Older
1
2
3
4
5
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import uuid
from dataclasses import field
6
from typing import Any, Literal, get_args
7
8

from vllm.config.utils import config
9
from vllm.utils.hashing import safe_hash
10
11
12
13
14
15
16
17
18
19

KVProducer = Literal["kv_producer", "kv_both"]
KVConsumer = Literal["kv_consumer", "kv_both"]
KVRole = Literal[KVProducer, KVConsumer]


@config
class KVTransferConfig:
    """Configuration for distributed KV cache transfer."""

20
    kv_connector: str | None = None
21
22
23
    """The KV connector for vLLM to transmit KV caches between vLLM instances.
    """

24
    engine_id: str | None = None
25
26
    """The engine id for KV transfers."""

27
    kv_buffer_device: str = "cuda"
28
29
    """The device used by kv connector to buffer the KV cache. Choices are 
    'cuda' and 'cpu'."""
30
31
32
33
34

    kv_buffer_size: float = 1e9
    """The buffer size for TorchDistributedConnector. Measured in number of
    bytes. Recommended value: 1e9 (about 1GB)."""

35
    kv_role: KVRole | None = None
36
37
38
    """Whether this vLLM instance produces, consumes KV cache, or both. Choices
    are 'kv_producer', 'kv_consumer', and 'kv_both'."""

39
    kv_rank: int | None = None
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
    """The rank of this vLLM instance in the KV cache transfer. Typical value:
    0 for prefill instance, 1 for decode instance.
    Currently only 1P1D is supported."""

    kv_parallel_size: int = 1
    """The number of parallel instances for KV cache transfer. For
    P2pNcclConnector, this should be 2."""

    kv_ip: str = "127.0.0.1"
    """The KV connector ip, used to build distributed connection."""

    kv_port: int = 14579
    """The KV connector port, used to build distributed connection."""

    kv_connector_extra_config: dict[str, Any] = field(default_factory=dict)
    """any extra config that the connector may need."""

57
    kv_connector_module_path: str | None = None
58
59
60
    """The Python module path to dynamically load the KV connector from.
    Only supported in V1."""

61
62
63
    enable_permute_local_kv: bool = False
    """Experiment feature flag to enable HND to NHD KV Transfer"""

64
65
66
67
68
    kv_load_failure_policy: Literal["recompute", "fail"] = "recompute"
    """Policy for handling KV cache load failures.
    'recompute': reschedule the request to recompute failed blocks (default)
    'fail': immediately fail the request with an error finish reason"""

69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
    def compute_hash(self) -> str:
        """
        WARNING: Whenever a new field is added to this config,
        ensure that it is included in the factors list if
        it affects the computation graph.

        Provide a hash that uniquely identifies all the configs
        that affect the structure of the computation
        graph from input ids/embeddings to the final hidden states,
        excluding anything before input ids/embeddings and after
        the final hidden states.
        """
        # no factors to consider.
        # this config will not affect the computation graph.
        factors: list[Any] = []
84
        hash_str = safe_hash(str(factors).encode(), usedforsecurity=False).hexdigest()
85
86
87
88
89
90
91
        return hash_str

    def __post_init__(self) -> None:
        if self.engine_id is None:
            self.engine_id = str(uuid.uuid4())

        if self.kv_role is not None and self.kv_role not in get_args(KVRole):
92
93
94
95
            raise ValueError(
                f"Unsupported kv_role: {self.kv_role}. "
                f"Supported roles are {get_args(KVRole)}"
            )
96
97

        if self.kv_connector is not None and self.kv_role is None:
98
            raise ValueError(
99
                "Please specify kv_role when kv_connector "
100
101
                f"is set, supported roles are {get_args(KVRole)}"
            )
102
103
104

    @property
    def is_kv_transfer_instance(self) -> bool:
105
        return self.kv_connector is not None and self.kv_role in get_args(KVRole)
106
107
108

    @property
    def is_kv_producer(self) -> bool:
109
        return self.kv_connector is not None and self.kv_role in get_args(KVProducer)
110
111
112

    @property
    def is_kv_consumer(self) -> bool:
113
        return self.kv_connector is not None and self.kv_role in get_args(KVConsumer)
114
115
116

    def get_from_extra_config(self, key, default) -> Any:
        return self.kv_connector_extra_config.get(key, default)