interfaces_base.py 4.5 KB
Newer Older
1
2
# SPDX-License-Identifier: Apache-2.0

3
4
from typing import (TYPE_CHECKING, Optional, Protocol, Type, Union, overload,
                    runtime_checkable)
5
6
7
8
9
10
11
12
13

import torch
import torch.nn as nn
from typing_extensions import TypeIs, TypeVar

from vllm.logger import init_logger
from vllm.utils import supports_kw

if TYPE_CHECKING:
14
    from vllm.config import VllmConfig
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
    from vllm.model_executor.layers.pooler import PoolerOutput
    from vllm.model_executor.layers.sampler import SamplerOutput
    from vllm.model_executor.pooling_metadata import PoolingMetadata
    from vllm.model_executor.sampling_metadata import SamplingMetadata

logger = init_logger(__name__)

# The type of hidden states
# Currently, T = torch.Tensor for all models except for Medusa
# which has T = List[torch.Tensor]
T = TypeVar("T", default=torch.Tensor)
T_co = TypeVar("T_co", default=torch.Tensor, covariant=True)

# NOTE: Unlike those in `interfaces.py`, we don't define `ClassVar` tags
# for the base interfaces to avoid breaking OOT registration for existing models
# that don't inherit from the base interface classes


@runtime_checkable
34
class VllmModel(Protocol[T_co]):
35
    """The interface required for all models in vLLM."""
36
37
38

    def __init__(
        self,
39
40
        vllm_config: "VllmConfig",
        prefix: str = "",
41
42
43
44
45
46
47
48
49
50
51
52
53
    ) -> None:
        ...

    def forward(
        self,
        input_ids: torch.Tensor,
        positions: torch.Tensor,
    ) -> T_co:
        ...


def _check_vllm_model_init(model: Union[Type[object], object]) -> bool:
    model_init = model.__init__
54
    return supports_kw(model_init, "vllm_config")
55
56
57
58
59
60
61


def _check_vllm_model_forward(model: Union[Type[object], object]) -> bool:
    model_forward = getattr(model, "forward", None)
    if not callable(model_forward):
        return False

62
    vllm_kws = ("input_ids", "positions")
63
64
65
66
67
68
69
    missing_kws = tuple(kw for kw in vllm_kws
                        if not supports_kw(model_forward, kw))

    if missing_kws and (isinstance(model, type)
                        and issubclass(model, nn.Module)):
        logger.warning(
            "The model (%s) is missing "
70
            "vLLM-specific keywords from its `forward` method: %s",
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
            model,
            missing_kws,
        )

    return len(missing_kws) == 0


@overload
def is_vllm_model(model: Type[object]) -> TypeIs[Type[VllmModel]]:
    ...


@overload
def is_vllm_model(model: object) -> TypeIs[VllmModel]:
    ...


def is_vllm_model(
    model: Union[Type[object], object],
) -> Union[TypeIs[Type[VllmModel]], TypeIs[VllmModel]]:
    return _check_vllm_model_init(model) and _check_vllm_model_forward(model)


@runtime_checkable
95
class VllmModelForTextGeneration(VllmModel[T], Protocol[T]):
96
    """The interface required for all generative models in vLLM."""
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140

    def compute_logits(
        self,
        hidden_states: T,
        sampling_metadata: "SamplingMetadata",
    ) -> Optional[T]:
        """Return `None` if TP rank > 0."""
        ...

    def sample(
        self,
        logits: T,
        sampling_metadata: "SamplingMetadata",
    ) -> "SamplerOutput":
        """Only called on TP rank 0."""
        ...


@overload
def is_text_generation_model(
        model: Type[object]) -> TypeIs[Type[VllmModelForTextGeneration]]:
    ...


@overload
def is_text_generation_model(
        model: object) -> TypeIs[VllmModelForTextGeneration]:
    ...


def is_text_generation_model(
    model: Union[Type[object], object],
) -> Union[TypeIs[Type[VllmModelForTextGeneration]],
           TypeIs[VllmModelForTextGeneration]]:
    if not is_vllm_model(model):
        return False

    if isinstance(model, type):
        return isinstance(model, VllmModelForTextGeneration)

    return isinstance(model, VllmModelForTextGeneration)


@runtime_checkable
141
class VllmModelForPooling(VllmModel[T], Protocol[T]):
142
    """The interface required for all pooling models in vLLM."""
143
144
145
146
147
148
149
150
151
152
153

    def pooler(
        self,
        hidden_states: T,
        pooling_metadata: "PoolingMetadata",
    ) -> "PoolerOutput":
        """Only called on TP rank 0."""
        ...


@overload
154
def is_pooling_model(model: Type[object]) -> TypeIs[Type[VllmModelForPooling]]:
155
156
157
158
    ...


@overload
159
def is_pooling_model(model: object) -> TypeIs[VllmModelForPooling]:
160
161
162
    ...


163
def is_pooling_model(
164
    model: Union[Type[object], object],
165
) -> Union[TypeIs[Type[VllmModelForPooling]], TypeIs[VllmModelForPooling]]:
166
167
168
169
    if not is_vllm_model(model):
        return False

    if isinstance(model, type):
170
        return isinstance(model, VllmModelForPooling)
171

172
    return isinstance(model, VllmModelForPooling)