data.py 8.51 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
from dataclasses import dataclass
4
from typing import TYPE_CHECKING, Any, Literal, TypeAlias
5

6
import torch
7
from typing_extensions import NotRequired, TypedDict
8

9
10
from vllm.sampling_params import SamplingParams

11
if TYPE_CHECKING:
12
13
    from vllm.multimodal.inputs import (
        MultiModalDataDict,
14
        MultiModalEncDecInputs,
15
16
17
        MultiModalInputs,
        MultiModalUUIDDict,
    )
18
19
else:
    MultiModalDataDict = object
20
    MultiModalEncDecInputs = object
21
22
    MultiModalInputs = object
    MultiModalUUIDDict = object
23
24


25
26
27
28
29
30
31
# Inputs to LLM API
class _PromptOptions(TypedDict):
    """
    Additional options available to all
    [`SingletonPrompt`][vllm.inputs.data.SingletonPrompt].
    """

32
    multi_modal_data: NotRequired[MultiModalDataDict | None]
33
34
35
36
37
    """
    Optional multi-modal data to pass to the model,
    if the model supports it.
    """

38
    mm_processor_kwargs: NotRequired[dict[str, Any] | None]
39
40
41
42
43
44
45
    """
    Optional multi-modal processor kwargs to be forwarded to the
    multimodal input mapper & processor. Note that if multiple modalities
    have registered mappers etc for the model being considered, we attempt
    to pass the mm_processor_kwargs to each of them.
    """

46
    multi_modal_uuids: NotRequired[MultiModalUUIDDict]
47
48
49
50
51
52
53
54
    """
    Optional user-specified UUIDs for multimodal items, mapped by modality.
    Lists must match the number of items per modality and may contain `None`.
    For `None` entries, the hasher will compute IDs automatically; non-None
    entries override the default hashes for caching, and MUST be unique per
    multimodal item.
    """

55
56
57
58
59
    cache_salt: NotRequired[str]
    """
    Optional cache salt to be used for prefix caching.
    """

60

61
class TextPrompt(_PromptOptions):
62
63
64
65
66
67
    """Schema for a text prompt."""

    prompt: str
    """The input text to be tokenized before passing to the model."""


68
class TokensPrompt(_PromptOptions):
69
70
    """Schema for a tokenized prompt."""

71
    prompt_token_ids: list[int]
72
73
    """A list of token IDs to pass to the model."""

74
75
76
    prompt: NotRequired[str]
    """The prompt text corresponding to the token IDs, if available."""

77
    token_type_ids: NotRequired[list[int]]
78
79
    """A list of token type IDs to pass to the cross encoder model."""

80

81
class EmbedsPrompt(_PromptOptions):
82
83
84
85
86
    """Schema for a prompt provided via token embeddings."""

    prompt_embeds: torch.Tensor
    """The embeddings of the prompt."""

87
88
89
    prompt: NotRequired[str]
    """The prompt text corresponding to the token embeddings, if available."""

90

91
92
93
94
95
DecoderOnlyPrompt: TypeAlias = (
    str | TextPrompt | list[int] | TokensPrompt | EmbedsPrompt
)
"""
Schema of a prompt for a decoder-only model:
96

97
98
99
100
- A text prompt (string or [`TextPrompt`][vllm.inputs.data.TextPrompt])
- A tokenized prompt (list of token IDs, or
  [`TokensPrompt`][vllm.inputs.data.TokensPrompt])
- An embeddings prompt ([`EmbedsPrompt`][vllm.inputs.data.EmbedsPrompt])
101

102
103
104
For encoder-decoder models, passing a singleton prompt is shorthand for passing
`ExplicitEncoderDecoderPrompt(encoder_prompt=prompt, decoder_prompt=None)`.
"""
105
106


107
108
109
110
111
112
113
EncoderPrompt: TypeAlias = str | TextPrompt | list[int] | TokensPrompt
"""
Schema of a prompt for the encoder part of a encoder-decoder model:

- A text prompt (string or [`TextPrompt`][vllm.inputs.data.TextPrompt])
- A tokenized prompt (list of token IDs, or
  [`TokensPrompt`][vllm.inputs.data.TokensPrompt])
114
115
"""

116

117
DecoderPrompt: TypeAlias = str | TextPrompt | list[int] | TokensPrompt
118
"""
119
Schema of a prompt for the decoder part of an encoder-decoder model:
120

121
122
123
- A text prompt (string or [`TextPrompt`][vllm.inputs.data.TextPrompt])
- A tokenized prompt (list of token IDs, or
  [`TokensPrompt`][vllm.inputs.data.TokensPrompt])
124

125
126
127
Note:
    Multi-modal inputs are not supported for decoder prompts.
"""
128

129

130
class ExplicitEncoderDecoderPrompt(TypedDict):
131
    """
132
133
134
135
    Schema for a pair of encoder and decoder singleton prompts.

    Note:
        This schema is not valid for decoder-only models.
136
137
    """

138
139
    encoder_prompt: EncoderPrompt
    """The prompt for the encoder part of the model."""
140

141
142
143
    decoder_prompt: DecoderPrompt | None
    """
    The prompt for the decoder part of the model.
144

145
146
    Passing `None` will cause the prompt to be inferred automatically.
    """
147

148

149
EncoderDecoderPrompt: TypeAlias = EncoderPrompt | ExplicitEncoderDecoderPrompt
150
"""
151
Schema for a prompt for an encoder-decoder model.
152

153
154
155
156
157
158
159
160
161
162
You can pass a singleton encoder prompt, in which case the decoder prompt is
considered to be `None` (i.e., infer automatically).
"""


SingletonPrompt: TypeAlias = DecoderOnlyPrompt | EncoderPrompt | DecoderPrompt
"""
Schema for a single prompt. This is as opposed to a data structure
which encapsulates multiple prompts, such as
[`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt].
163
164
165
"""


166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
PromptType: TypeAlias = DecoderOnlyPrompt | EncoderDecoderPrompt
"""
Schema for any prompt, regardless of model type.

This is the input format accepted by most [`LLM`][vllm.entrypoints.llm.LLM] APIs.
"""


class DataPrompt(_PromptOptions):
    """
    Represents generic inputs that are converted to
    [`PromptType`][vllm.inputs.data.PromptType] by IO processor plugins.
    """

    data: Any
    """The input data."""

    data_format: str
    """The input data format."""


# Outputs of processor
class _InputOptions(TypedDict):
    """
    Additional options available to all input types.
    """

    cache_salt: NotRequired[str]
    """Optional cache salt to be used for prefix caching."""


class TokenInputs(_InputOptions):
198
    """Represents token-based inputs."""
199
200
201
202

    type: Literal["token"]
    """The type of inputs."""

203
    prompt_token_ids: list[int]
204
205
    """The token IDs of the prompt."""

206

207
def token_inputs(
208
    prompt_token_ids: list[int],
209
    cache_salt: str | None = None,
210
) -> TokenInputs:
211
212
    """Construct [`TokenInputs`][vllm.inputs.data.TokenInputs] from optional
    values."""
213
    inputs = TokenInputs(type="token", prompt_token_ids=prompt_token_ids)
214

215
216
    if cache_salt is not None:
        inputs["cache_salt"] = cache_salt
217
218
219
220

    return inputs


221
class EmbedsInputs(_InputOptions):
222
223
224
225
226
227
228
229
230
    """Represents embeddings-based inputs."""

    type: Literal["embeds"]
    """The type of inputs."""

    prompt_embeds: torch.Tensor
    """The embeddings of the prompt."""


231
232
def embeds_inputs(
    prompt_embeds: torch.Tensor,
233
    cache_salt: str | None = None,
234
) -> EmbedsInputs:
235
236
    """Construct [`EmbedsInputs`][vllm.inputs.data.EmbedsInputs] from optional
    values."""
237
238
239
240
    inputs = EmbedsInputs(type="embeds", prompt_embeds=prompt_embeds)

    if cache_salt is not None:
        inputs["cache_salt"] = cache_salt
241
242
243
244

    return inputs


245
DecoderOnlyInputs: TypeAlias = TokenInputs | EmbedsInputs | MultiModalInputs
246
"""
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
A processed prompt from
[`InputPreprocessor`][vllm.inputs.preprocess.InputPreprocessor]
which can be passed to
[`InputProcessor`][vllm.v1.engine.input_processor.InputProcessor]
for decoder-only models.
"""


EncoderInputs: TypeAlias = TokenInputs | MultiModalEncDecInputs
"""
A processed encoder prompt from
[`InputPreprocessor`][vllm.inputs.preprocess.InputPreprocessor]
which can be passed to
[`InputProcessor`][vllm.v1.engine.input_processor.InputProcessor]
for encoder-decoder models.
"""


DecoderInputs: TypeAlias = TokenInputs | MultiModalInputs
"""
A processed decoder prompt from
[`InputPreprocessor`][vllm.inputs.preprocess.InputPreprocessor]
which can be passed to
[`InputProcessor`][vllm.v1.engine.input_processor.InputProcessor]
for encoder-decoder models.
272
273
274
"""


275
class EncoderDecoderInputs(TypedDict):
276
    """
277
278
279
280
281
    A processed pair of encoder and decoder singleton prompts.
    [`InputPreprocessor`][vllm.inputs.preprocess.InputPreprocessor]
    which can be passed to
    [`InputProcessor`][vllm.v1.engine.input_processor.InputProcessor]
    for encoder-decoder models.
282
    """
283

284
    encoder: EncoderInputs
285
    """The inputs for the encoder portion."""
286

287
    decoder: DecoderInputs
288
    """The inputs for the decoder portion."""
289

290

291
ProcessorInputs: TypeAlias = DecoderOnlyInputs | EncoderDecoderInputs
292
"""
293
294
295
296
A processed prompt from
[`InputPreprocessor`][vllm.inputs.preprocess.InputPreprocessor]
which can be passed to
[`InputProcessor`][vllm.v1.engine.input_processor.InputProcessor].
297
"""
298

299

300
SingletonInputs: TypeAlias = DecoderOnlyInputs | MultiModalEncDecInputs
301
302
303
304
305
306
307
308
309
310
311
312


@dataclass
class StreamingInput:
    """Input data for a streaming generation request.

    This is used with generate() to support multi-turn streaming sessions
    where inputs are provided via an async generator.
    """

    prompt: PromptType
    sampling_params: SamplingParams | None = None