protocol.py 9.9 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
5
# Adapted from
# https://github.com/lm-sys/FastChat/blob/168ccc29d3f7edc50823016105c024fe2282732a/fastchat/protocol/openai_api_protocol.py
Zhuohan Li's avatar
Zhuohan Li committed
6
import time
7
from http import HTTPStatus
8
from typing import Any, ClassVar, Literal, TypeAlias
Zhuohan Li's avatar
Zhuohan Li committed
9

10
import regex as re
11
12
13
14
15
16
from pydantic import (
    BaseModel,
    ConfigDict,
    Field,
    model_validator,
)
Zhuohan Li's avatar
Zhuohan Li committed
17

18
from vllm.entrypoints.chat_utils import make_tool_call_id
19
from vllm.logger import init_logger
20
from vllm.sampling_params import SamplingParams
21
22
from vllm.utils import random_uuid
from vllm.utils.import_utils import resolve_obj_by_qualname
23

24
25
logger = init_logger(__name__)

Zhuohan Li's avatar
Zhuohan Li committed
26

27
class OpenAIBaseModel(BaseModel):
28
29
30
    # OpenAI API does allow extra fields
    model_config = ConfigDict(extra="allow")

31
    # Cache class field names
32
    field_names: ClassVar[set[str] | None] = None
33

34
    @model_validator(mode="wrap")
35
    @classmethod
36
37
38
39
    def __log_extra_fields__(cls, data, handler):
        result = handler(data)
        if not isinstance(data, dict):
            return result
40
41
        field_names = cls.field_names
        if field_names is None:
42
43
44
45
            # Get all class field names and their potential aliases
            field_names = set()
            for field_name, field in cls.model_fields.items():
                field_names.add(field_name)
46
                if alias := getattr(field, "alias", None):
47
48
49
50
51
                    field_names.add(alias)
            cls.field_names = field_names

        # Compare against both field names and aliases
        if any(k not in field_names for k in data):
52
            logger.debug(
53
                "The following fields were present in the request but ignored: %s",
54
55
                data.keys() - field_names,
            )
56
        return result
57
58


59
class ErrorInfo(OpenAIBaseModel):
Zhuohan Li's avatar
Zhuohan Li committed
60
61
    message: str
    type: str
62
    param: str | None = None
63
    code: int
Zhuohan Li's avatar
Zhuohan Li committed
64
65


66
67
68
69
class ErrorResponse(OpenAIBaseModel):
    error: ErrorInfo


70
class ModelPermission(OpenAIBaseModel):
Zhuohan Li's avatar
Zhuohan Li committed
71
72
73
74
75
76
77
78
79
80
    id: str = Field(default_factory=lambda: f"modelperm-{random_uuid()}")
    object: str = "model_permission"
    created: int = Field(default_factory=lambda: int(time.time()))
    allow_create_engine: bool = False
    allow_sampling: bool = True
    allow_logprobs: bool = True
    allow_search_indices: bool = False
    allow_view: bool = True
    allow_fine_tuning: bool = False
    organization: str = "*"
81
    group: str | None = None
82
    is_blocking: bool = False
Zhuohan Li's avatar
Zhuohan Li committed
83
84


85
class ModelCard(OpenAIBaseModel):
Zhuohan Li's avatar
Zhuohan Li committed
86
87
88
    id: str
    object: str = "model"
    created: int = Field(default_factory=lambda: int(time.time()))
Woosuk Kwon's avatar
Woosuk Kwon committed
89
    owned_by: str = "vllm"
90
91
92
    root: str | None = None
    parent: str | None = None
    max_model_len: int | None = None
93
    permission: list[ModelPermission] = Field(default_factory=list)
Zhuohan Li's avatar
Zhuohan Li committed
94
95


96
class ModelList(OpenAIBaseModel):
Zhuohan Li's avatar
Zhuohan Li committed
97
    object: str = "list"
98
    data: list[ModelCard] = Field(default_factory=list)
Zhuohan Li's avatar
Zhuohan Li committed
99
100


101
class PromptTokenUsageInfo(OpenAIBaseModel):
102
    cached_tokens: int | None = None
103
104


105
class UsageInfo(OpenAIBaseModel):
Zhuohan Li's avatar
Zhuohan Li committed
106
107
    prompt_tokens: int = 0
    total_tokens: int = 0
108
109
    completion_tokens: int | None = 0
    prompt_tokens_details: PromptTokenUsageInfo | None = None
Zhuohan Li's avatar
Zhuohan Li committed
110
111


112
113
class RequestResponseMetadata(BaseModel):
    request_id: str
114
    final_usage_info: UsageInfo | None = None
115
116


117
118
class JsonSchemaResponseFormat(OpenAIBaseModel):
    name: str
119
    description: str | None = None
120
121
    # schema is the field in openai but that causes conflicts with pydantic so
    # instead use json_schema with an alias
122
123
    json_schema: dict[str, Any] | None = Field(default=None, alias="schema")
    strict: bool | None = None
124
125


126
class LegacyStructuralTag(OpenAIBaseModel):
127
128
129
    begin: str
    # schema is the field, but that causes conflicts with pydantic so
    # instead use structural_tag_schema with an alias
130
    structural_tag_schema: dict[str, Any] | None = Field(default=None, alias="schema")
131
132
133
    end: str


134
class LegacyStructuralTagResponseFormat(OpenAIBaseModel):
135
    type: Literal["structural_tag"]
136
    structures: list[LegacyStructuralTag]
137
138
139
    triggers: list[str]


140
141
142
143
144
145
146
147
148
149
class StructuralTagResponseFormat(OpenAIBaseModel):
    type: Literal["structural_tag"]
    format: Any


AnyStructuralTagResponseFormat: TypeAlias = (
    LegacyStructuralTagResponseFormat | StructuralTagResponseFormat
)


150
class ResponseFormat(OpenAIBaseModel):
151
    # type must be "json_schema", "json_object", or "text"
152
    type: Literal["text", "json_object", "json_schema"]
153
    json_schema: JsonSchemaResponseFormat | None = None
154
155


156
157
158
AnyResponseFormat: TypeAlias = (
    ResponseFormat | StructuralTagResponseFormat | LegacyStructuralTagResponseFormat
)
159
160


161
class StreamOptions(OpenAIBaseModel):
162
    include_usage: bool | None = False
163
    continuous_usage_stats: bool | None = False
164
165


166
167
class FunctionDefinition(OpenAIBaseModel):
    name: str
168
169
    description: str | None = None
    parameters: dict[str, Any] | None = None
170
171


172
173
# extra="forbid" is a workaround to have kwargs as a field,
# see https://github.com/pydantic/pydantic/issues/3125
174
175
class LogitsProcessorConstructor(BaseModel):
    qualname: str
176
177
    args: list[Any] | None = None
    kwargs: dict[str, Any] | None = None
178

179
180
    model_config = ConfigDict(extra="forbid")

181

182
LogitsProcessors = list[str | LogitsProcessorConstructor]
183
184


185
def get_logits_processors(
186
187
    processors: LogitsProcessors | None, pattern: str | None
) -> list[Any] | None:
188
189
190
    if processors and pattern:
        logits_processors = []
        for processor in processors:
191
            qualname = processor if isinstance(processor, str) else processor.qualname
192
193
194
195
            if not re.match(pattern, qualname):
                raise ValueError(
                    f"Logits processor '{qualname}' is not allowed by this "
                    "server. See --logits-processor-pattern engine argument "
196
197
                    "for more information."
                )
198
199
200
201
202
203
204
            try:
                logits_processor = resolve_obj_by_qualname(qualname)
            except Exception as e:
                raise ValueError(
                    f"Logits processor '{qualname}' could not be resolved: {e}"
                ) from e
            if isinstance(processor, LogitsProcessorConstructor):
205
206
207
                logits_processor = logits_processor(
                    *processor.args or [], **processor.kwargs or {}
                )
208
209
210
211
212
            logits_processors.append(logits_processor)
        return logits_processors
    elif processors:
        raise ValueError(
            "The `logits_processors` argument is not supported by this "
213
            "server. See --logits-processor-pattern engine argument "
214
215
            "for more information."
        )
216
217
218
    return None


219
class FunctionCall(OpenAIBaseModel):
220
221
222
223
    # Internal field to preserve native tool call ID from tool parser.
    # Excluded from serialization to maintain OpenAI API compatibility
    # (function object should only contain 'name' and 'arguments').
    id: str | None = Field(default=None, exclude=True)
224
225
226
227
228
    name: str
    arguments: str


class ToolCall(OpenAIBaseModel):
229
    id: str = Field(default_factory=make_tool_call_id)
230
231
232
233
    type: Literal["function"] = "function"
    function: FunctionCall


234
class DeltaFunctionCall(BaseModel):
235
236
    name: str | None = None
    arguments: str | None = None
237
238
239
240


# a tool call delta where everything is optional
class DeltaToolCall(OpenAIBaseModel):
241
242
    id: str | None = None
    type: Literal["function"] | None = None
243
    index: int
244
    function: DeltaFunctionCall | None = None
245
246
247
248
249
250
251


class ExtractedToolCallInformation(BaseModel):
    # indicate if tools were called
    tools_called: bool

    # extracted tool calls
252
    tool_calls: list[ToolCall]
253
254
255

    # content - per OpenAI spec, content AND tool calls can be returned rarely
    # But some models will do this intentionally
256
    content: str | None = None
257
258


259
class DeltaMessage(OpenAIBaseModel):
260
261
    role: str | None = None
    content: str | None = None
262
    reasoning: str | None = None
263
    tool_calls: list[DeltaToolCall] = Field(default_factory=list)
264
265


266
267
268
269
270
271
272
273
class GenerationError(Exception):
    """raised when finish_reason indicates internal server error (500)"""

    def __init__(self, message: str = "Internal server error"):
        super().__init__(message)
        self.status_code = HTTPStatus.INTERNAL_SERVER_ERROR


274
275
276
####### Tokens IN <> Tokens OUT #######
class GenerateRequest(BaseModel):
    request_id: str = Field(
277
        default_factory=random_uuid,
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
        description=(
            "The request_id related to this request. If the caller does "
            "not set it, a random_uuid will be generated. This id is used "
            "through out the inference process and return in response."
        ),
    )
    token_ids: list[int]
    """The token ids to generate text from."""

    # features: MultiModalFeatureSpec
    # TODO (NickLucche): implement once Renderer work is completed
    features: str | None = None
    """The processed MM inputs for the model."""

    sampling_params: SamplingParams
    """The sampling parameters for the model."""

    model: str | None = None

    stream: bool | None = False
    stream_options: StreamOptions | None = None
    cache_salt: str | None = Field(
        default=None,
        description=(
            "If specified, the prefix cache will be salted with the provided "
            "string to prevent an attacker to guess prompts in multi-user "
            "environments. The salt should be random, protected from "
            "access by 3rd parties, and long enough to be "
            "unpredictable (e.g., 43 characters base64-encoded, corresponding "
            "to 256 bit)."
        ),
    )
    priority: int = Field(
        default=0,
        description=(
            "The priority of the request (lower means earlier handling; "
            "default: 0). Any priority other than 0 will raise an error "
            "if the served model does not use priority scheduling."
        ),
    )
    kv_transfer_params: dict[str, Any] | None = Field(
        default=None,
        description="KVTransfer parameters used for disaggregated serving.",
    )