Unverified Commit dfd3aa99 authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

feat: address MR comments from TRTLLM/GMS integration (#8255)


Signed-off-by: default avatarHannah Zhang <hannahz@nvidia.com>
parent d07288c5
...@@ -107,13 +107,8 @@ class TRTLLMEngineQuiesceController: ...@@ -107,13 +107,8 @@ class TRTLLMEngineQuiesceController:
"TRT-LLM does not expose _collective_rpc; skipping %s", method "TRT-LLM does not expose _collective_rpc; skipping %s", method
) )
return return
try:
rpc(method, args=(rpc_tags,), kwargs={}, non_block=False) rpc(method, args=(rpc_tags,), kwargs={}, non_block=False)
except Exception:
if method != "wakeup":
raise
# Some TRT-LLM versions use "wake_up" instead of "wakeup"
rpc("wake_up", args=(rpc_tags,), kwargs={}, non_block=False)
@staticmethod @staticmethod
def _release_gms_weights() -> None: def _release_gms_weights() -> None:
......
...@@ -53,10 +53,17 @@ def _make_handler() -> _ConcreteHandler: ...@@ -53,10 +53,17 @@ def _make_handler() -> _ConcreteHandler:
handler._no_inflight_requests = asyncio.Event() handler._no_inflight_requests = asyncio.Event()
handler._no_inflight_requests.set() handler._no_inflight_requests.set()
handler._reject_new_requests = False handler._reject_new_requests = False
# Mock the quiesce controller that release/resume delegate to # Mock the quiesce controller that release/resume delegate to.
# quiesce side_effect mirrors the real implementation;
# tests don't need to manually update state after a release call.
handler._quiesce_controller = MagicMock() handler._quiesce_controller = MagicMock()
handler._quiesce_controller.is_quiesced = False handler._quiesce_controller.is_quiesced = False
handler._quiesce_controller.quiesce = AsyncMock(return_value=True)
async def _quiesce(tags=None):
handler._quiesce_controller.is_quiesced = True
return True
handler._quiesce_controller.quiesce = AsyncMock(side_effect=_quiesce)
handler._quiesce_controller.resume = AsyncMock(return_value=True) handler._quiesce_controller.resume = AsyncMock(return_value=True)
handler._quiesce_controller.mark_resumed = MagicMock() handler._quiesce_controller.mark_resumed = MagicMock()
return handler return handler
...@@ -165,9 +172,6 @@ async def test_release_and_resume_round_trip(): ...@@ -165,9 +172,6 @@ async def test_release_and_resume_round_trip():
release = await handler.release_memory_occupation({}) release = await handler.release_memory_occupation({})
assert release["status"] == "ok" assert release["status"] == "ok"
# After release, controller reports quiesced
handler._quiesce_controller.is_quiesced = True
resume = await handler.resume_memory_occupation({}) resume = await handler.resume_memory_occupation({})
assert resume["status"] == "ok" assert resume["status"] == "ok"
handler._quiesce_controller.resume.assert_awaited_once() handler._quiesce_controller.resume.assert_awaited_once()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment