Unverified Commit e2148dc5 authored by kourosh hakhamaneshi's avatar kourosh hakhamaneshi Committed by GitHub
Browse files

[Bugfix] Add check_health to v1 async client. (#19821)


Signed-off-by: default avatarKourosh Hakhamaneshi <kourosh@anyscale.com>
parent b1098b40
...@@ -369,3 +369,32 @@ async def test_dp_rank_argument(monkeypatch: pytest.MonkeyPatch): ...@@ -369,3 +369,32 @@ async def test_dp_rank_argument(monkeypatch: pytest.MonkeyPatch):
sampling_params=sampling_params, sampling_params=sampling_params,
data_parallel_rank=1): data_parallel_rank=1):
pass pass
@pytest.mark.asyncio
async def test_check_health(monkeypatch: pytest.MonkeyPatch):
"""Test that check_health returns normally for healthy engine
and raises EngineDeadError when the engine is dead.
"""
from unittest.mock import patch
from vllm.v1.engine.exceptions import EngineDeadError
with monkeypatch.context() as m, ExitStack() as after:
m.setenv("VLLM_USE_V1", "1")
engine = AsyncLLM.from_engine_args(TEXT_ENGINE_ARGS)
after.callback(engine.shutdown)
# Test 1: Healthy engine should not raise any exception
await engine.check_health()
# Test 2: Mock the errored property to simulate a dead engine
with patch.object(type(engine),
'errored',
new_callable=lambda: property(lambda self: True)
), pytest.raises(EngineDeadError):
await engine.check_health()
# Test 3: Verify healthy engine still works after mock
await engine.check_health()
...@@ -552,6 +552,8 @@ class AsyncLLM(EngineClient): ...@@ -552,6 +552,8 @@ class AsyncLLM(EngineClient):
async def check_health(self) -> None: async def check_health(self) -> None:
logger.debug("Called check_health.") logger.debug("Called check_health.")
if self.errored:
raise self.dead_error
async def start_profile(self) -> None: async def start_profile(self) -> None:
await self.engine_core.profile_async(True) await self.engine_core.profile_async(True)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment