kv_transfer.py 3.95 KB
Newer Older
1
2
3
4
5
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import uuid
from dataclasses import field
6
from typing import Any, Literal, get_args
7
8
9
10

from pydantic.dataclasses import dataclass

from vllm.config.utils import config
11
from vllm.utils.hashing import safe_hash
12
13
14
15
16
17
18
19
20
21
22

KVProducer = Literal["kv_producer", "kv_both"]
KVConsumer = Literal["kv_consumer", "kv_both"]
KVRole = Literal[KVProducer, KVConsumer]


@config
@dataclass
class KVTransferConfig:
    """Configuration for distributed KV cache transfer."""

23
    kv_connector: str | None = None
24
25
26
    """The KV connector for vLLM to transmit KV caches between vLLM instances.
    """

27
    engine_id: str | None = None
28
29
    """The engine id for KV transfers."""

30
    kv_buffer_device: str = "cuda"
31
32
    """The device used by kv connector to buffer the KV cache. Choices are 
    'cuda' and 'cpu'."""
33
34
35
36
37

    kv_buffer_size: float = 1e9
    """The buffer size for TorchDistributedConnector. Measured in number of
    bytes. Recommended value: 1e9 (about 1GB)."""

38
    kv_role: KVRole | None = None
39
40
41
    """Whether this vLLM instance produces, consumes KV cache, or both. Choices
    are 'kv_producer', 'kv_consumer', and 'kv_both'."""

42
    kv_rank: int | None = None
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
    """The rank of this vLLM instance in the KV cache transfer. Typical value:
    0 for prefill instance, 1 for decode instance.
    Currently only 1P1D is supported."""

    kv_parallel_size: int = 1
    """The number of parallel instances for KV cache transfer. For
    P2pNcclConnector, this should be 2."""

    kv_ip: str = "127.0.0.1"
    """The KV connector ip, used to build distributed connection."""

    kv_port: int = 14579
    """The KV connector port, used to build distributed connection."""

    kv_connector_extra_config: dict[str, Any] = field(default_factory=dict)
    """any extra config that the connector may need."""

60
    kv_connector_module_path: str | None = None
61
62
63
    """The Python module path to dynamically load the KV connector from.
    Only supported in V1."""

64
65
66
    enable_permute_local_kv: bool = False
    """Experiment feature flag to enable HND to NHD KV Transfer"""

67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
    def compute_hash(self) -> str:
        """
        WARNING: Whenever a new field is added to this config,
        ensure that it is included in the factors list if
        it affects the computation graph.

        Provide a hash that uniquely identifies all the configs
        that affect the structure of the computation
        graph from input ids/embeddings to the final hidden states,
        excluding anything before input ids/embeddings and after
        the final hidden states.
        """
        # no factors to consider.
        # this config will not affect the computation graph.
        factors: list[Any] = []
82
        hash_str = safe_hash(str(factors).encode(), usedforsecurity=False).hexdigest()
83
84
85
86
87
88
89
        return hash_str

    def __post_init__(self) -> None:
        if self.engine_id is None:
            self.engine_id = str(uuid.uuid4())

        if self.kv_role is not None and self.kv_role not in get_args(KVRole):
90
91
92
93
            raise ValueError(
                f"Unsupported kv_role: {self.kv_role}. "
                f"Supported roles are {get_args(KVRole)}"
            )
94
95

        if self.kv_connector is not None and self.kv_role is None:
96
            raise ValueError(
97
                "Please specify kv_role when kv_connector "
98
99
                f"is set, supported roles are {get_args(KVRole)}"
            )
100
101
102

    @property
    def is_kv_transfer_instance(self) -> bool:
103
        return self.kv_connector is not None and self.kv_role in get_args(KVRole)
104
105
106

    @property
    def is_kv_producer(self) -> bool:
107
        return self.kv_connector is not None and self.kv_role in get_args(KVProducer)
108
109
110

    @property
    def is_kv_consumer(self) -> bool:
111
        return self.kv_connector is not None and self.kv_role in get_args(KVConsumer)
112
113
114

    def get_from_extra_config(self, key, default) -> Any:
        return self.kv_connector_extra_config.get(key, default)