Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
49b7e930
Unverified
Commit
49b7e930
authored
Jun 17, 2025
by
Hongkuan Zhou
Committed by
GitHub
Jun 17, 2025
Browse files
feat: add graceful shutdown in vllm_1 (#1562)
parent
44c5be7e
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
19 additions
and
7 deletions
+19
-7
examples/vllm_v1/components/worker.py
examples/vllm_v1/components/worker.py
+19
-7
No files found.
examples/vllm_v1/components/worker.py
View file @
49b7e930
...
@@ -34,7 +34,7 @@ from vllm.v1.engine.core import EngineCoreProc
...
@@ -34,7 +34,7 @@ from vllm.v1.engine.core import EngineCoreProc
from
vllm.v1.engine.core_client
import
CoreEngineProcManager
from
vllm.v1.engine.core_client
import
CoreEngineProcManager
from
vllm.v1.executor.abstract
import
Executor
from
vllm.v1.executor.abstract
import
Executor
from
dynamo.sdk
import
async_on_start
,
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
dynamo_context
,
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -44,8 +44,8 @@ class VllmBaseWorker:
...
@@ -44,8 +44,8 @@ class VllmBaseWorker:
class_name
=
self
.
__class__
.
__name__
class_name
=
self
.
__class__
.
__name__
self
.
engine_args
=
parse_vllm_args
(
class_name
,
""
)
self
.
engine_args
=
parse_vllm_args
(
class_name
,
""
)
signal
.
signal
(
signal
.
SIGTERM
,
self
.
shutdown_vllm_engine
)
signal
.
signal
(
signal
.
SIGTERM
,
self
.
graceful_shutdown
)
signal
.
signal
(
signal
.
SIGINT
,
self
.
shutdown_vllm_engine
)
signal
.
signal
(
signal
.
SIGINT
,
self
.
graceful_shutdown
)
self
.
set_side_channel_host_and_port
()
self
.
set_side_channel_host_and_port
()
...
@@ -60,9 +60,21 @@ class VllmBaseWorker:
...
@@ -60,9 +60,21 @@ class VllmBaseWorker:
logger
.
info
(
"VllmWorker has been initialized"
)
logger
.
info
(
"VllmWorker has been initialized"
)
def
shutdown_vllm_engine
(
self
,
signum
,
frame
):
def
graceful_shutdown
(
self
,
signum
,
frame
):
"""Shutdown the background loop"""
"""
logger
.
info
(
f
"Received signal
{
signum
}
, shutting down"
)
Gracefully shutdown the worker by shutting down the dynamo runtime.
This will
1. disable the generate endpoint so no new requests are accepted.
2. wait until all in-flight requests are completed.
3. finish the awaiting for the endpoint service.
4. rely on python's garbage collection to clean up the GPU.
"""
logger
.
info
(
"Shutting down dynamo runtime..."
)
dynamo_context
[
"runtime"
].
shutdown
()
logger
.
info
(
"Dynamo runtime shutdown complete."
)
def
shutdown_vllm_worker
(
self
,
signum
,
frame
):
"""Shutdown the worker immediately by killing the background loop"""
loop
=
asyncio
.
get_event_loop
()
loop
=
asyncio
.
get_event_loop
()
try
:
try
:
self
.
engine_client
.
close
()
self
.
engine_client
.
close
()
...
@@ -100,7 +112,7 @@ class VllmBaseWorker:
...
@@ -100,7 +112,7 @@ class VllmBaseWorker:
This sets the port number for the side channel.
This sets the port number for the side channel.
"""
"""
if
hostname
is
None
:
if
hostname
is
None
:
hostname
=
socket
.
gethostname
()
hostname
=
"127.0.0.1"
if
port
is
None
:
if
port
is
None
:
with
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
)
as
s
:
with
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
)
as
s
:
s
.
bind
((
""
,
0
))
# Bind to a free port provided by the host.
s
.
bind
((
""
,
0
))
# Bind to a free port provided by the host.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment