Unverified Commit 67244c86 authored by dongbo910220's avatar dongbo910220 Committed by GitHub
Browse files

feat(api): Return 503 on /health when engine is dead (#24897)


Signed-off-by: default avatardongbo910220 <1275604947@qq.com>
Co-authored-by: default avatarClaude <noreply@anthropic.com>
parent 072d7e53
......@@ -103,6 +103,7 @@ from vllm.transformers_utils.tokenizer import MistralTokenizer
from vllm.usage.usage_lib import UsageContext
from vllm.utils import (Device, FlexibleArgumentParser, decorate_logs,
is_valid_ipv6_address, set_ulimit)
from vllm.v1.engine.exceptions import EngineDeadError
from vllm.v1.metrics.prometheus import get_prometheus_registry
from vllm.version import __version__ as VLLM_VERSION
......@@ -351,8 +352,11 @@ def engine_client(request: Request) -> EngineClient:
@router.get("/health", response_class=Response)
async def health(raw_request: Request) -> Response:
"""Health check."""
try:
await engine_client(raw_request).check_health()
return Response(status_code=200)
except EngineDeadError:
return Response(status_code=503)
@router.get("/load")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment