pooling_params.py 8.08 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
from copy import deepcopy
5
from typing import Any
6

7
import msgspec
8

9
from vllm.config import ModelConfig, PoolerConfig
10
from vllm.logger import init_logger
11
from vllm.sampling_params import RequestOutputKind
12
from vllm.tasks import PoolingTask
13

14
15
logger = init_logger(__name__)

16

17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
class LateInteractionParams(
    msgspec.Struct,
    omit_defaults=True,  # type: ignore[call-arg]
    array_like=True,
):  # type: ignore[call-arg]
    """Metadata for worker-side late-interaction scoring.

    Attributes:
        mode:
            - "cache_query": cache query token embeddings
            - "score_doc": score a document against a cached query.
        query_key: stable key used for both DP routing and worker cache lookup.
        query_uses: expected number of document requests
    """

    mode: str
    query_key: str
    query_uses: int | None = None


37
class PoolingParams(
38
39
40
41
    msgspec.Struct,
    omit_defaults=True,  # type: ignore[call-arg]
    array_like=True,
):  # type: ignore[call-arg]
42
    """API parameters for pooling models.
43
44

    Attributes:
45
46
        use_activation: Whether to apply activation function to the pooler outputs.
            `None` uses the pooler's default, which is `True` in most cases.
47
        dimensions: Reduce the dimensions of embeddings
48
            if model support matryoshka representation.
49
    """
50
51

    # --8<-- [start:common-pooling-params]
52
    use_activation: bool | None = None
53
    # --8<-- [end:common-pooling-params]
54

55
    ## for embeddings models
56
    # --8<-- [start:embed-pooling-params]
57
    dimensions: int | None = None
58
    # --8<-- [end:embed-pooling-params]
59

60
    ## for step pooling models
61
62
    step_tag_id: int | None = None
    returned_token_ids: list[int] | None = None
63

64
    ## Internal use only
65
    task: PoolingTask | None = None
66
    requires_token_ids: bool = False
67
    skip_reading_prefix_cache: bool | None = None
68
    late_interaction_params: LateInteractionParams | None = None
69
    extra_kwargs: dict[str, Any] | None = None
70
71
72
73
    output_kind: RequestOutputKind = RequestOutputKind.FINAL_ONLY

    @property
    def all_parameters(self) -> list[str]:
74
        return ["dimensions", "use_activation"]
75
76
77
78

    @property
    def valid_parameters(self):
        return {
79
            "embed": ["dimensions", "use_activation"],
80
            "classify": ["use_activation"],
81
            "token_embed": ["dimensions", "use_activation"],
82
            "token_classify": ["use_activation"],
83
84
        }

85
86
    def clone(self) -> "PoolingParams":
        """Returns a deep copy of the PoolingParams instance."""
87
88
        return deepcopy(self)

89
    def verify(self, model_config: ModelConfig) -> None:
90
91
92
        # plugin task uses io_processor.parse_request to verify inputs,
        # skipping PoolingParams verify
        if self.task == "plugin":
93
94
            if self.skip_reading_prefix_cache is None:
                self.skip_reading_prefix_cache = True
95
96
            return

97
98
99
100
        # skipping verify, let plugins configure and validate pooling params
        if self.task not in self.valid_parameters:
            return

101
102
103
        # NOTE: Task validation needs to done against the model instance,
        # which is not available in model config. So, it's not included
        # in this method
104
105
106
107
        self._merge_default_parameters(model_config)
        self._set_default_parameters(model_config)
        self._verify_valid_parameters()

108
    def _merge_default_parameters(self, model_config: ModelConfig) -> None:
109
110
111
112
113
114
115
116
117
118
119
120
121
122
        pooler_config = model_config.pooler_config
        if pooler_config is None:
            return

        assert self.task is not None, "task must be set"
        valid_parameters = self.valid_parameters[self.task]

        for k in valid_parameters:
            if getattr(pooler_config, k, None) is None:
                continue

            if getattr(self, k, None) is None:
                setattr(self, k, getattr(pooler_config, k))

123
124
125
126
127
128
129
130
131
        if self.skip_reading_prefix_cache is None:
            # If prefix caching is enabled,
            # the output of all pooling may less than n_prompt_tokens,
            # we need to skip reading cache at this request.
            if self.task in ["token_embed", "token_classify"]:
                self.skip_reading_prefix_cache = True
            else:
                self.skip_reading_prefix_cache = False

132
133
134
        self._verify_step_pooling(pooler_config, valid_parameters)

    def _verify_step_pooling(
135
136
137
        self,
        pooler_config: PoolerConfig,
        valid_parameters: list[str],
138
139
    ):
        step_pooling_parameters = ["step_tag_id", "returned_token_ids"]
140
        if pooler_config.tok_pooling_type != "STEP":
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
            invalid_parameters = []
            for k in step_pooling_parameters:
                if getattr(self, k, None) is not None:
                    invalid_parameters.append(k)

            if invalid_parameters:
                raise ValueError(
                    f"Task {self.task} only supports {valid_parameters} "
                    f"parameters, does not support "
                    f"{invalid_parameters} parameters"
                )
        else:
            for k in step_pooling_parameters:
                if getattr(pooler_config, k, None) is None:
                    continue

                if getattr(self, k, None) is None:
                    setattr(self, k, getattr(pooler_config, k))

160
    def _set_default_parameters(self, model_config: ModelConfig):
161
        if self.task in ["embed", "token_embed"]:
162
163
            if self.use_activation is None:
                self.use_activation = True
164

165
            if self.dimensions is not None:
166
                if not model_config.is_matryoshka:
167
                    raise ValueError(
168
                        f'Model "{model_config.served_model_name}" does not '
169
170
                        f"support matryoshka representation, "
                        f"changing output dimensions will lead to poor results."
171
172
173
174
175
176
                    )

                mds = model_config.matryoshka_dimensions
                if mds is not None:
                    if self.dimensions not in mds:
                        raise ValueError(
177
                            f"Model {model_config.served_model_name!r} "
178
179
180
181
                            f"only supports {str(mds)} matryoshka dimensions, "
                            f"use other output dimensions will "
                            f"lead to poor results."
                        )
182
183
184
                elif self.dimensions < 1:
                    raise ValueError("Dimensions must be greater than 0")

185
        elif self.task in ["classify", "token_classify"]:
186
187
            if self.use_activation is None:
                self.use_activation = True
188
        else:
189
            raise ValueError(f"Unknown pooling task: {self.task!r}")
190
191
192
193
194
195
196
197
198
199
200
201
202
203

    def _verify_valid_parameters(self):
        assert self.task is not None, "task must be set"
        valid_parameters = self.valid_parameters[self.task]
        invalid_parameters = []
        for k in self.all_parameters:
            if k in valid_parameters:
                continue

            if getattr(self, k, None) is not None:
                invalid_parameters.append(k)

        if invalid_parameters:
            raise ValueError(
204
                f"Task {self.task!r} only supports {valid_parameters} "
205
                f"parameters, does not support "
206
207
                f"{invalid_parameters} parameters"
            )
208
209

    def __repr__(self) -> str:
210
211
212
213
        return (
            f"PoolingParams("
            f"task={self.task}, "
            f"dimensions={self.dimensions}, "
214
            f"use_activation={self.use_activation}, "
215
216
217
            f"step_tag_id={self.step_tag_id}, "
            f"returned_token_ids={self.returned_token_ids}, "
            f"requires_token_ids={self.requires_token_ids}, "
218
            f"skip_reading_prefix_cache={self.skip_reading_prefix_cache}, "
219
            f"late_interaction_params={self.late_interaction_params}, "
220
221
            f"extra_kwargs={self.extra_kwargs})"
        )
222
223

    def __post_init__(self) -> None:
224
        assert self.output_kind == RequestOutputKind.FINAL_ONLY, (
225
            "For pooling output_kind has to be FINAL_ONLY"
226
        )