Unverified Commit 57a314d1 authored by Andreas Karatzas's avatar Andreas Karatzas Committed by GitHub
Browse files

[CI][Bugfix] Fix 500 errors from priority overflow and TemplateError...


[CI][Bugfix] Fix 500 errors from priority overflow and TemplateError subclasses in schema fuzz tests (#37127)
Signed-off-by: default avatarAndreas Karatzas <akaratza@amd.com>
parent d4c57863
......@@ -45,6 +45,8 @@ pystemmer==3.0.0
# via mteb
# Multi-modal processing
av==16.1.0
# required for audio_in_video tests
blobfile==3.0.0
# Multi-Modal Models Test
decord==0.6.0
......
......@@ -7,7 +7,6 @@ import json
import time
from typing import Annotated, Any, ClassVar, Literal
import torch
from openai.types.chat.chat_completion_audio import (
ChatCompletionAudio as OpenAIChatCompletionAudio,
)
......@@ -48,7 +47,8 @@ from vllm.utils import random_uuid
logger = init_logger(__name__)
_LONG_INFO = torch.iinfo(torch.long)
_INT64_MIN = -(2**63)
_INT64_MAX = 2**63 - 1
class ChatMessage(OpenAIBaseModel):
......@@ -165,7 +165,7 @@ class ChatCompletionRequest(OpenAIBaseModel):
n: int | None = 1
presence_penalty: float | None = 0.0
response_format: AnyResponseFormat | None = None
seed: int | None = Field(None, ge=_LONG_INFO.min, le=_LONG_INFO.max)
seed: int | None = Field(None, ge=_INT64_MIN, le=_INT64_MAX)
stop: str | list[str] | None = []
stream: bool | None = False
stream_options: StreamOptions | None = None
......@@ -198,9 +198,7 @@ class ChatCompletionRequest(OpenAIBaseModel):
min_tokens: int = 0
skip_special_tokens: bool = True
spaces_between_special_tokens: bool = True
truncate_prompt_tokens: Annotated[int, Field(ge=-1, le=_LONG_INFO.max)] | None = (
None
)
truncate_prompt_tokens: Annotated[int, Field(ge=-1, le=_INT64_MAX)] | None = None
prompt_logprobs: int | None = None
allowed_token_ids: list[int] | None = None
bad_words: list[str] = Field(default_factory=list)
......@@ -285,6 +283,8 @@ class ChatCompletionRequest(OpenAIBaseModel):
)
priority: int = Field(
default=0,
ge=_INT64_MIN,
le=_INT64_MAX,
description=(
"The priority of the request (lower means earlier handling; "
"default: 0). Any priority other than 0 will raise an error "
......
......@@ -6,6 +6,7 @@ import json
import time
from collections.abc import AsyncGenerator, AsyncIterator
from collections.abc import Sequence as GenericSequence
from http import HTTPStatus
from typing import TYPE_CHECKING, Any, Final
import partial_json_parser
......@@ -1289,7 +1290,12 @@ class OpenAIServingChat(OpenAIServing):
except asyncio.CancelledError:
return self.create_error_response("Client disconnected")
assert final_res is not None
if final_res is None:
return self.create_error_response(
"No output received from the engine.",
err_type="InternalServerError",
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
)
choices: list[ChatCompletionResponseChoice] = []
if self.tool_call_id_type == "kimi_k2":
......
......@@ -7,7 +7,6 @@ import json
import time
from typing import Annotated, Any, Literal
import torch
from pydantic import Field, model_validator
from vllm.config import ModelConfig
......@@ -36,7 +35,8 @@ from vllm.utils import random_uuid
logger = init_logger(__name__)
_LONG_INFO = torch.iinfo(torch.long)
_INT64_MIN = -(2**63)
_INT64_MAX = 2**63 - 1
class CompletionRequest(OpenAIBaseModel):
......@@ -57,7 +57,7 @@ class CompletionRequest(OpenAIBaseModel):
max_tokens: int | None = 16
n: int = 1
presence_penalty: float | None = 0.0
seed: int | None = Field(None, ge=_LONG_INFO.min, le=_LONG_INFO.max)
seed: int | None = Field(None, ge=_INT64_MIN, le=_INT64_MAX)
stop: str | list[str] | None = []
stream: bool | None = False
stream_options: StreamOptions | None = None
......@@ -78,9 +78,7 @@ class CompletionRequest(OpenAIBaseModel):
min_tokens: int = 0
skip_special_tokens: bool = True
spaces_between_special_tokens: bool = True
truncate_prompt_tokens: Annotated[int, Field(ge=-1, le=_LONG_INFO.max)] | None = (
None
)
truncate_prompt_tokens: Annotated[int, Field(ge=-1, le=_INT64_MAX)] | None = None
allowed_token_ids: list[int] | None = None
prompt_logprobs: int | None = None
# --8<-- [end:completion-sampling-params]
......@@ -108,6 +106,8 @@ class CompletionRequest(OpenAIBaseModel):
)
priority: int = Field(
default=0,
ge=_INT64_MIN,
le=_INT64_MAX,
description=(
"The priority of the request (lower means earlier handling; "
"default: 0). Any priority other than 0 will raise an error "
......
......@@ -6,7 +6,6 @@
import time
from typing import Any, Literal, TypeAlias
import torch
from openai.types.responses import (
ResponseCodeInterpreterCallCodeDeltaEvent,
ResponseCodeInterpreterCallCodeDoneEvent,
......@@ -78,7 +77,8 @@ from vllm.utils import random_uuid
logger = init_logger(__name__)
_LONG_INFO = torch.iinfo(torch.long)
_INT64_MIN = -(2**63)
_INT64_MAX = 2**63 - 1
class InputTokensDetails(OpenAIBaseModel):
......@@ -210,6 +210,8 @@ class ResponsesRequest(OpenAIBaseModel):
)
priority: int = Field(
default=0,
ge=_INT64_MIN,
le=_INT64_MAX,
description=(
"The priority of the request (lower means earlier handling; "
"default: 0). Any priority other than 0 will raise an error "
......@@ -246,7 +248,7 @@ class ResponsesRequest(OpenAIBaseModel):
)
repetition_penalty: float | None = None
seed: int | None = Field(None, ge=_LONG_INFO.min, le=_LONG_INFO.max)
seed: int | None = Field(None, ge=_INT64_MIN, le=_INT64_MAX)
stop: str | list[str] | None = []
ignore_eos: bool = False
vllm_xargs: dict[str, str | int | float | list[str | int | float]] | None = Field(
......
......@@ -34,6 +34,8 @@ class PoolingBasicRequestMixin(OpenAIBaseModel):
)
priority: int = Field(
default=0,
ge=-(2**63),
le=2**63 - 1,
description=(
"The priority of the request (lower means earlier handling; "
"default: 0). Any priority other than 0 will raise an error "
......
......@@ -93,6 +93,8 @@ class GenerateRequest(BaseModel):
)
priority: int = Field(
default=0,
ge=-(2**63),
le=2**63 - 1,
description=(
"The priority of the request (lower means earlier handling; "
"default: 0). Any priority other than 0 will raise an error "
......
......@@ -331,8 +331,8 @@ def create_error_response(
err_type = "InternalServerError"
status_code = exc.status_code
param = None
elif exc.__class__.__name__ == "TemplateError":
# jinja2.TemplateError (avoid importing jinja2)
elif any(cls.__name__ == "TemplateError" for cls in type(exc).__mro__):
# jinja2.TemplateError and its subclasses (avoid importing jinja2)
err_type = "BadRequestError"
status_code = HTTPStatus.BAD_REQUEST
param = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment