Unverified Commit 57a314d1 authored by Andreas Karatzas's avatar Andreas Karatzas Committed by GitHub
Browse files

[CI][Bugfix] Fix 500 errors from priority overflow and TemplateError...


[CI][Bugfix] Fix 500 errors from priority overflow and TemplateError subclasses in schema fuzz tests (#37127)
Signed-off-by: default avatarAndreas Karatzas <akaratza@amd.com>
parent d4c57863
...@@ -45,6 +45,8 @@ pystemmer==3.0.0 ...@@ -45,6 +45,8 @@ pystemmer==3.0.0
# via mteb # via mteb
# Multi-modal processing # Multi-modal processing
av==16.1.0
# required for audio_in_video tests
blobfile==3.0.0 blobfile==3.0.0
# Multi-Modal Models Test # Multi-Modal Models Test
decord==0.6.0 decord==0.6.0
......
...@@ -7,7 +7,6 @@ import json ...@@ -7,7 +7,6 @@ import json
import time import time
from typing import Annotated, Any, ClassVar, Literal from typing import Annotated, Any, ClassVar, Literal
import torch
from openai.types.chat.chat_completion_audio import ( from openai.types.chat.chat_completion_audio import (
ChatCompletionAudio as OpenAIChatCompletionAudio, ChatCompletionAudio as OpenAIChatCompletionAudio,
) )
...@@ -48,7 +47,8 @@ from vllm.utils import random_uuid ...@@ -48,7 +47,8 @@ from vllm.utils import random_uuid
logger = init_logger(__name__) logger = init_logger(__name__)
_LONG_INFO = torch.iinfo(torch.long) _INT64_MIN = -(2**63)
_INT64_MAX = 2**63 - 1
class ChatMessage(OpenAIBaseModel): class ChatMessage(OpenAIBaseModel):
...@@ -165,7 +165,7 @@ class ChatCompletionRequest(OpenAIBaseModel): ...@@ -165,7 +165,7 @@ class ChatCompletionRequest(OpenAIBaseModel):
n: int | None = 1 n: int | None = 1
presence_penalty: float | None = 0.0 presence_penalty: float | None = 0.0
response_format: AnyResponseFormat | None = None response_format: AnyResponseFormat | None = None
seed: int | None = Field(None, ge=_LONG_INFO.min, le=_LONG_INFO.max) seed: int | None = Field(None, ge=_INT64_MIN, le=_INT64_MAX)
stop: str | list[str] | None = [] stop: str | list[str] | None = []
stream: bool | None = False stream: bool | None = False
stream_options: StreamOptions | None = None stream_options: StreamOptions | None = None
...@@ -198,9 +198,7 @@ class ChatCompletionRequest(OpenAIBaseModel): ...@@ -198,9 +198,7 @@ class ChatCompletionRequest(OpenAIBaseModel):
min_tokens: int = 0 min_tokens: int = 0
skip_special_tokens: bool = True skip_special_tokens: bool = True
spaces_between_special_tokens: bool = True spaces_between_special_tokens: bool = True
truncate_prompt_tokens: Annotated[int, Field(ge=-1, le=_LONG_INFO.max)] | None = ( truncate_prompt_tokens: Annotated[int, Field(ge=-1, le=_INT64_MAX)] | None = None
None
)
prompt_logprobs: int | None = None prompt_logprobs: int | None = None
allowed_token_ids: list[int] | None = None allowed_token_ids: list[int] | None = None
bad_words: list[str] = Field(default_factory=list) bad_words: list[str] = Field(default_factory=list)
...@@ -285,6 +283,8 @@ class ChatCompletionRequest(OpenAIBaseModel): ...@@ -285,6 +283,8 @@ class ChatCompletionRequest(OpenAIBaseModel):
) )
priority: int = Field( priority: int = Field(
default=0, default=0,
ge=_INT64_MIN,
le=_INT64_MAX,
description=( description=(
"The priority of the request (lower means earlier handling; " "The priority of the request (lower means earlier handling; "
"default: 0). Any priority other than 0 will raise an error " "default: 0). Any priority other than 0 will raise an error "
......
...@@ -6,6 +6,7 @@ import json ...@@ -6,6 +6,7 @@ import json
import time import time
from collections.abc import AsyncGenerator, AsyncIterator from collections.abc import AsyncGenerator, AsyncIterator
from collections.abc import Sequence as GenericSequence from collections.abc import Sequence as GenericSequence
from http import HTTPStatus
from typing import TYPE_CHECKING, Any, Final from typing import TYPE_CHECKING, Any, Final
import partial_json_parser import partial_json_parser
...@@ -1289,7 +1290,12 @@ class OpenAIServingChat(OpenAIServing): ...@@ -1289,7 +1290,12 @@ class OpenAIServingChat(OpenAIServing):
except asyncio.CancelledError: except asyncio.CancelledError:
return self.create_error_response("Client disconnected") return self.create_error_response("Client disconnected")
assert final_res is not None if final_res is None:
return self.create_error_response(
"No output received from the engine.",
err_type="InternalServerError",
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
)
choices: list[ChatCompletionResponseChoice] = [] choices: list[ChatCompletionResponseChoice] = []
if self.tool_call_id_type == "kimi_k2": if self.tool_call_id_type == "kimi_k2":
......
...@@ -7,7 +7,6 @@ import json ...@@ -7,7 +7,6 @@ import json
import time import time
from typing import Annotated, Any, Literal from typing import Annotated, Any, Literal
import torch
from pydantic import Field, model_validator from pydantic import Field, model_validator
from vllm.config import ModelConfig from vllm.config import ModelConfig
...@@ -36,7 +35,8 @@ from vllm.utils import random_uuid ...@@ -36,7 +35,8 @@ from vllm.utils import random_uuid
logger = init_logger(__name__) logger = init_logger(__name__)
_LONG_INFO = torch.iinfo(torch.long) _INT64_MIN = -(2**63)
_INT64_MAX = 2**63 - 1
class CompletionRequest(OpenAIBaseModel): class CompletionRequest(OpenAIBaseModel):
...@@ -57,7 +57,7 @@ class CompletionRequest(OpenAIBaseModel): ...@@ -57,7 +57,7 @@ class CompletionRequest(OpenAIBaseModel):
max_tokens: int | None = 16 max_tokens: int | None = 16
n: int = 1 n: int = 1
presence_penalty: float | None = 0.0 presence_penalty: float | None = 0.0
seed: int | None = Field(None, ge=_LONG_INFO.min, le=_LONG_INFO.max) seed: int | None = Field(None, ge=_INT64_MIN, le=_INT64_MAX)
stop: str | list[str] | None = [] stop: str | list[str] | None = []
stream: bool | None = False stream: bool | None = False
stream_options: StreamOptions | None = None stream_options: StreamOptions | None = None
...@@ -78,9 +78,7 @@ class CompletionRequest(OpenAIBaseModel): ...@@ -78,9 +78,7 @@ class CompletionRequest(OpenAIBaseModel):
min_tokens: int = 0 min_tokens: int = 0
skip_special_tokens: bool = True skip_special_tokens: bool = True
spaces_between_special_tokens: bool = True spaces_between_special_tokens: bool = True
truncate_prompt_tokens: Annotated[int, Field(ge=-1, le=_LONG_INFO.max)] | None = ( truncate_prompt_tokens: Annotated[int, Field(ge=-1, le=_INT64_MAX)] | None = None
None
)
allowed_token_ids: list[int] | None = None allowed_token_ids: list[int] | None = None
prompt_logprobs: int | None = None prompt_logprobs: int | None = None
# --8<-- [end:completion-sampling-params] # --8<-- [end:completion-sampling-params]
...@@ -108,6 +106,8 @@ class CompletionRequest(OpenAIBaseModel): ...@@ -108,6 +106,8 @@ class CompletionRequest(OpenAIBaseModel):
) )
priority: int = Field( priority: int = Field(
default=0, default=0,
ge=_INT64_MIN,
le=_INT64_MAX,
description=( description=(
"The priority of the request (lower means earlier handling; " "The priority of the request (lower means earlier handling; "
"default: 0). Any priority other than 0 will raise an error " "default: 0). Any priority other than 0 will raise an error "
......
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
import time import time
from typing import Any, Literal, TypeAlias from typing import Any, Literal, TypeAlias
import torch
from openai.types.responses import ( from openai.types.responses import (
ResponseCodeInterpreterCallCodeDeltaEvent, ResponseCodeInterpreterCallCodeDeltaEvent,
ResponseCodeInterpreterCallCodeDoneEvent, ResponseCodeInterpreterCallCodeDoneEvent,
...@@ -78,7 +77,8 @@ from vllm.utils import random_uuid ...@@ -78,7 +77,8 @@ from vllm.utils import random_uuid
logger = init_logger(__name__) logger = init_logger(__name__)
_LONG_INFO = torch.iinfo(torch.long) _INT64_MIN = -(2**63)
_INT64_MAX = 2**63 - 1
class InputTokensDetails(OpenAIBaseModel): class InputTokensDetails(OpenAIBaseModel):
...@@ -210,6 +210,8 @@ class ResponsesRequest(OpenAIBaseModel): ...@@ -210,6 +210,8 @@ class ResponsesRequest(OpenAIBaseModel):
) )
priority: int = Field( priority: int = Field(
default=0, default=0,
ge=_INT64_MIN,
le=_INT64_MAX,
description=( description=(
"The priority of the request (lower means earlier handling; " "The priority of the request (lower means earlier handling; "
"default: 0). Any priority other than 0 will raise an error " "default: 0). Any priority other than 0 will raise an error "
...@@ -246,7 +248,7 @@ class ResponsesRequest(OpenAIBaseModel): ...@@ -246,7 +248,7 @@ class ResponsesRequest(OpenAIBaseModel):
) )
repetition_penalty: float | None = None repetition_penalty: float | None = None
seed: int | None = Field(None, ge=_LONG_INFO.min, le=_LONG_INFO.max) seed: int | None = Field(None, ge=_INT64_MIN, le=_INT64_MAX)
stop: str | list[str] | None = [] stop: str | list[str] | None = []
ignore_eos: bool = False ignore_eos: bool = False
vllm_xargs: dict[str, str | int | float | list[str | int | float]] | None = Field( vllm_xargs: dict[str, str | int | float | list[str | int | float]] | None = Field(
......
...@@ -34,6 +34,8 @@ class PoolingBasicRequestMixin(OpenAIBaseModel): ...@@ -34,6 +34,8 @@ class PoolingBasicRequestMixin(OpenAIBaseModel):
) )
priority: int = Field( priority: int = Field(
default=0, default=0,
ge=-(2**63),
le=2**63 - 1,
description=( description=(
"The priority of the request (lower means earlier handling; " "The priority of the request (lower means earlier handling; "
"default: 0). Any priority other than 0 will raise an error " "default: 0). Any priority other than 0 will raise an error "
......
...@@ -93,6 +93,8 @@ class GenerateRequest(BaseModel): ...@@ -93,6 +93,8 @@ class GenerateRequest(BaseModel):
) )
priority: int = Field( priority: int = Field(
default=0, default=0,
ge=-(2**63),
le=2**63 - 1,
description=( description=(
"The priority of the request (lower means earlier handling; " "The priority of the request (lower means earlier handling; "
"default: 0). Any priority other than 0 will raise an error " "default: 0). Any priority other than 0 will raise an error "
......
...@@ -331,8 +331,8 @@ def create_error_response( ...@@ -331,8 +331,8 @@ def create_error_response(
err_type = "InternalServerError" err_type = "InternalServerError"
status_code = exc.status_code status_code = exc.status_code
param = None param = None
elif exc.__class__.__name__ == "TemplateError": elif any(cls.__name__ == "TemplateError" for cls in type(exc).__mro__):
# jinja2.TemplateError (avoid importing jinja2) # jinja2.TemplateError and its subclasses (avoid importing jinja2)
err_type = "BadRequestError" err_type = "BadRequestError"
status_code = HTTPStatus.BAD_REQUEST status_code = HTTPStatus.BAD_REQUEST
param = None param = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment