Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
162065fe
Unverified
Commit
162065fe
authored
Oct 06, 2025
by
Waël Boukhobza
Committed by
GitHub
Oct 06, 2025
Browse files
fix: vLLM engine graceful shutdown to prevent VRAM leaks on crash (#2898)
Signed-off-by:
Wael Boukhobza
<
wawa_wael@live.fr
>
parent
ede0da16
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
22 additions
and
0 deletions
+22
-0
components/src/dynamo/vllm/engine_monitor.py
components/src/dynamo/vllm/engine_monitor.py
+22
-0
No files found.
components/src/dynamo/vllm/engine_monitor.py
View file @
162065fe
...
@@ -4,6 +4,7 @@
...
@@ -4,6 +4,7 @@
import
asyncio
import
asyncio
import
logging
import
logging
import
os
import
os
import
signal
import
traceback
import
traceback
from
vllm.v1.engine.async_llm
import
AsyncLLM
from
vllm.v1.engine.async_llm
import
AsyncLLM
...
@@ -16,6 +17,7 @@ configure_dynamo_logging
...
@@ -16,6 +17,7 @@ configure_dynamo_logging
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
HEALTH_CHECK_INTERVAL
=
2
HEALTH_CHECK_INTERVAL
=
2
ENGINE_SHUTDOWN_TIMEOUT
=
30
# seconds
class
VllmEngineMonitor
:
class
VllmEngineMonitor
:
...
@@ -44,6 +46,25 @@ class VllmEngineMonitor:
...
@@ -44,6 +46,25 @@ class VllmEngineMonitor:
def
__del__
(
self
):
def
__del__
(
self
):
self
.
_monitor_task
.
cancel
()
self
.
_monitor_task
.
cancel
()
def
_shutdown_engine
(
self
):
"""
Shutdown the vLLM engine on crash scenarios to free resources.
"""
# Has timeout protection via SIGALRM
def
timeout_handler
(
signum
,
frame
):
raise
TimeoutError
(
"Engine shutdown timed out"
)
signal
.
signal
(
signal
.
SIGALRM
,
timeout_handler
)
signal
.
alarm
(
ENGINE_SHUTDOWN_TIMEOUT
)
try
:
self
.
engine_client
.
shutdown
()
except
Exception
as
e
:
logger
.
warning
(
f
"vLLM engine shutdown failed:
{
e
}
"
)
finally
:
signal
.
alarm
(
0
)
async
def
_check_engine_health
(
self
):
async
def
_check_engine_health
(
self
):
while
True
:
while
True
:
try
:
try
:
...
@@ -53,6 +74,7 @@ class VllmEngineMonitor:
...
@@ -53,6 +74,7 @@ class VllmEngineMonitor:
logger
.
error
(
f
"Traceback:
{
traceback
.
format_exc
()
}
"
)
logger
.
error
(
f
"Traceback:
{
traceback
.
format_exc
()
}
"
)
logger
.
error
(
f
"vLLM AsyncLLM health check failed:
{
e
}
"
)
logger
.
error
(
f
"vLLM AsyncLLM health check failed:
{
e
}
"
)
logger
.
warning
(
"Initiating Dynamo Runtime shutdown."
)
logger
.
warning
(
"Initiating Dynamo Runtime shutdown."
)
self
.
_shutdown_engine
()
self
.
runtime
.
shutdown
()
self
.
runtime
.
shutdown
()
os
.
_exit
(
1
)
os
.
_exit
(
1
)
except
asyncio
.
CancelledError
:
except
asyncio
.
CancelledError
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment