Unverified Commit 162065fe authored by Waël Boukhobza's avatar Waël Boukhobza Committed by GitHub
Browse files

fix: vLLM engine graceful shutdown to prevent VRAM leaks on crash (#2898)


Signed-off-by: default avatarWael Boukhobza <wawa_wael@live.fr>
parent ede0da16
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
import asyncio import asyncio
import logging import logging
import os import os
import signal
import traceback import traceback
from vllm.v1.engine.async_llm import AsyncLLM from vllm.v1.engine.async_llm import AsyncLLM
...@@ -16,6 +17,7 @@ configure_dynamo_logging ...@@ -16,6 +17,7 @@ configure_dynamo_logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
HEALTH_CHECK_INTERVAL = 2 HEALTH_CHECK_INTERVAL = 2
ENGINE_SHUTDOWN_TIMEOUT = 30 # seconds
class VllmEngineMonitor: class VllmEngineMonitor:
...@@ -44,6 +46,25 @@ class VllmEngineMonitor: ...@@ -44,6 +46,25 @@ class VllmEngineMonitor:
def __del__(self): def __del__(self):
self._monitor_task.cancel() self._monitor_task.cancel()
def _shutdown_engine(self):
"""
Shutdown the vLLM engine on crash scenarios to free resources.
"""
# Has timeout protection via SIGALRM
def timeout_handler(signum, frame):
raise TimeoutError("Engine shutdown timed out")
signal.signal(signal.SIGALRM, timeout_handler)
signal.alarm(ENGINE_SHUTDOWN_TIMEOUT)
try:
self.engine_client.shutdown()
except Exception as e:
logger.warning(f"vLLM engine shutdown failed: {e}")
finally:
signal.alarm(0)
async def _check_engine_health(self): async def _check_engine_health(self):
while True: while True:
try: try:
...@@ -53,6 +74,7 @@ class VllmEngineMonitor: ...@@ -53,6 +74,7 @@ class VllmEngineMonitor:
logger.error(f"Traceback: {traceback.format_exc()}") logger.error(f"Traceback: {traceback.format_exc()}")
logger.error(f"vLLM AsyncLLM health check failed: {e}") logger.error(f"vLLM AsyncLLM health check failed: {e}")
logger.warning("Initiating Dynamo Runtime shutdown.") logger.warning("Initiating Dynamo Runtime shutdown.")
self._shutdown_engine()
self.runtime.shutdown() self.runtime.shutdown()
os._exit(1) os._exit(1)
except asyncio.CancelledError: except asyncio.CancelledError:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment