Unverified Commit 6682c231 authored by Chauncey's avatar Chauncey Committed by GitHub
Browse files

[Bugfix] Add error handling for FINISHED_ERROR in OpenAIServing (#37148)


Signed-off-by: default avatarchaunceyjiang <chaunceyjiang@gmail.com>
parent 5ae685c1
...@@ -29,11 +29,13 @@ from vllm.entrypoints.chat_utils import load_chat_template ...@@ -29,11 +29,13 @@ from vllm.entrypoints.chat_utils import load_chat_template
from vllm.entrypoints.launcher import serve_http from vllm.entrypoints.launcher import serve_http
from vllm.entrypoints.logger import RequestLogger from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args
from vllm.entrypoints.openai.engine.protocol import GenerationError
from vllm.entrypoints.openai.models.protocol import BaseModelPath from vllm.entrypoints.openai.models.protocol import BaseModelPath
from vllm.entrypoints.openai.models.serving import OpenAIServingModels from vllm.entrypoints.openai.models.serving import OpenAIServingModels
from vllm.entrypoints.openai.server_utils import ( from vllm.entrypoints.openai.server_utils import (
engine_error_handler, engine_error_handler,
exception_handler, exception_handler,
generation_error_handler,
get_uvicorn_log_config, get_uvicorn_log_config,
http_exception_handler, http_exception_handler,
lifespan, lifespan,
...@@ -263,6 +265,7 @@ def build_app( ...@@ -263,6 +265,7 @@ def build_app(
app.exception_handler(RequestValidationError)(validation_exception_handler) app.exception_handler(RequestValidationError)(validation_exception_handler)
app.exception_handler(EngineGenerateError)(engine_error_handler) app.exception_handler(EngineGenerateError)(engine_error_handler)
app.exception_handler(EngineDeadError)(engine_error_handler) app.exception_handler(EngineDeadError)(engine_error_handler)
app.exception_handler(GenerationError)(generation_error_handler)
app.exception_handler(Exception)(exception_handler) app.exception_handler(Exception)(exception_handler)
# Ensure --api-key option from CLI takes precedence over VLLM_API_KEY # Ensure --api-key option from CLI takes precedence over VLLM_API_KEY
......
...@@ -21,7 +21,11 @@ from starlette.types import ASGIApp, Message, Receive, Scope, Send ...@@ -21,7 +21,11 @@ from starlette.types import ASGIApp, Message, Receive, Scope, Send
from vllm import envs from vllm import envs
from vllm.engine.protocol import EngineClient from vllm.engine.protocol import EngineClient
from vllm.entrypoints.launcher import terminate_if_errored from vllm.entrypoints.launcher import terminate_if_errored
from vllm.entrypoints.openai.engine.protocol import ErrorInfo, ErrorResponse from vllm.entrypoints.openai.engine.protocol import (
ErrorInfo,
ErrorResponse,
GenerationError,
)
from vllm.entrypoints.utils import create_error_response, sanitize_message from vllm.entrypoints.utils import create_error_response, sanitize_message
from vllm.exceptions import VLLMValidationError from vllm.exceptions import VLLMValidationError
from vllm.logger import init_logger from vllm.logger import init_logger
...@@ -354,6 +358,17 @@ async def engine_error_handler( ...@@ -354,6 +358,17 @@ async def engine_error_handler(
return JSONResponse(err.model_dump(), status_code=err.error.code) return JSONResponse(err.model_dump(), status_code=err.error.code)
async def generation_error_handler(req: Request, exc: GenerationError):
"""Handle GenerationError without logging stack traces.
GenerationError is a known, expected error (e.g. KV cache load failure)
that should be returned to the client as a 500 response without polluting
server logs with stack traces.
"""
err = create_error_response(exc)
return JSONResponse(err.model_dump(), status_code=err.error.code)
async def exception_handler(req: Request, exc: Exception): async def exception_handler(req: Request, exc: Exception):
if req.app.state.args.log_error_stack: if req.app.state.args.log_error_stack:
logger.exception( logger.exception(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment