Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ccede2b2
Unverified
Commit
ccede2b2
authored
Dec 11, 2024
by
Russell Bryant
Committed by
GitHub
Dec 11, 2024
Browse files
[Core] cleanup zmq ipc sockets on exit (#11115)
Signed-off-by:
Russell Bryant
<
rbryant@redhat.com
>
parent
24a36d6d
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
57 additions
and
17 deletions
+57
-17
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+9
-0
vllm/v1/engine/core.py
vllm/v1/engine/core.py
+14
-2
vllm/v1/engine/core_client.py
vllm/v1/engine/core_client.py
+20
-8
vllm/v1/executor/multiproc_executor.py
vllm/v1/executor/multiproc_executor.py
+14
-7
No files found.
vllm/entrypoints/openai/api_server.py
View file @
ccede2b2
import
asyncio
import
atexit
import
importlib
import
inspect
import
multiprocessing
...
...
@@ -196,6 +197,14 @@ async def build_async_engine_client_from_engine_args(
assert
engine_pid
is
not
None
,
"Engine process failed to start."
logger
.
info
(
"Started engine process with PID %d"
,
engine_pid
)
def
_cleanup_ipc_path
():
socket_path
=
ipc_path
.
replace
(
"ipc://"
,
""
)
if
os
.
path
.
exists
(
socket_path
):
os
.
remove
(
socket_path
)
# Ensure we clean up the local IPC socket file on exit.
atexit
.
register
(
_cleanup_ipc_path
)
# Build RPCClient, which conforms to EngineClient Protocol.
engine_config
=
engine_args
.
create_engine_config
()
build_client
=
partial
(
MQLLMEngineClient
,
ipc_path
,
engine_config
,
...
...
vllm/v1/engine/core.py
View file @
ccede2b2
...
...
@@ -4,6 +4,7 @@ import queue
import
signal
import
threading
import
time
from
dataclasses
import
dataclass
from
multiprocessing.process
import
BaseProcess
from
typing
import
List
,
Tuple
,
Type
,
Union
...
...
@@ -129,6 +130,14 @@ class EngineCore:
self
.
model_executor
.
profile
(
is_start
)
@
dataclass
class
EngineCoreProcHandle
:
proc
:
BaseProcess
ready_path
:
str
input_path
:
str
output_path
:
str
class
EngineCoreProc
(
EngineCore
):
"""ZMQ-wrapper for running EngineCore in background process."""
...
...
@@ -200,7 +209,7 @@ class EngineCoreProc(EngineCore):
input_path
:
str
,
output_path
:
str
,
ready_path
:
str
,
)
->
BaseProcess
:
)
->
EngineCoreProcHandle
:
# The current process might have CUDA context,
# so we need to spawn a new process.
# NOTE(rob): this is a problem for using EngineCoreProc w/
...
...
@@ -222,7 +231,10 @@ class EngineCoreProc(EngineCore):
# Wait for startup
EngineCoreProc
.
wait_for_startup
(
proc
,
ready_path
)
return
proc
return
EngineCoreProcHandle
(
proc
=
proc
,
ready_path
=
ready_path
,
input_path
=
input_path
,
output_path
=
output_path
)
@
staticmethod
def
run_engine_core
(
*
args
,
**
kwargs
):
...
...
vllm/v1/engine/core_client.py
View file @
ccede2b2
import
atexit
import
os
from
typing
import
List
,
Union
import
msgspec
...
...
@@ -148,7 +149,7 @@ class MPClient(EngineCoreClient):
self
.
input_socket
.
bind
(
input_path
)
# Start EngineCore in background process.
self
.
proc
=
EngineCoreProc
.
make_engine_core_process
(
self
.
proc
_handle
=
EngineCoreProc
.
make_engine_core_process
(
*
args
,
input_path
=
input_path
,
output_path
=
output_path
,
...
...
@@ -161,13 +162,24 @@ class MPClient(EngineCoreClient):
# Shut down the zmq context.
self
.
ctx
.
destroy
(
linger
=
0
)
if
hasattr
(
self
,
"proc_handle"
):
# Shutdown the process if needed.
if
hasattr
(
self
,
"proc"
)
and
self
.
proc
.
is_alive
():
self
.
proc
.
terminate
()
self
.
proc
.
join
(
5
)
if
self
.
proc
.
is_alive
():
kill_process_tree
(
self
.
proc
.
pid
)
if
self
.
proc_handle
.
proc
.
is_alive
():
self
.
proc_handle
.
proc
.
terminate
()
self
.
proc_handle
.
proc
.
join
(
5
)
if
self
.
proc_handle
.
proc
.
is_alive
():
kill_process_tree
(
self
.
proc_handle
.
proc
.
pid
)
# Remove zmq ipc socket files
ipc_sockets
=
[
self
.
proc_handle
.
ready_path
,
self
.
proc_handle
.
output_path
,
self
.
proc_handle
.
input_path
]
for
ipc_socket
in
ipc_sockets
:
socket_file
=
ipc_socket
.
replace
(
"ipc://"
,
""
)
if
os
.
path
.
exists
(
socket_file
):
os
.
remove
(
socket_file
)
def
__del__
(
self
):
self
.
shutdown
()
...
...
vllm/v1/executor/multiproc_executor.py
View file @
ccede2b2
...
...
@@ -172,17 +172,24 @@ class MultiprocExecutor:
# Send SIGTERM if still running
active_procs
=
[
w
.
proc
for
w
in
self
.
workers
if
w
.
proc
.
is_alive
()]
self
.
workers
=
None
for
p
in
active_procs
:
p
.
terminate
()
if
wait_for_termination
(
active_procs
,
4
):
return
if
not
wait_for_termination
(
active_procs
,
4
):
# Send SIGKILL if still running
active_procs
=
[
p
for
p
in
active_procs
if
p
.
is_alive
()]
for
p
in
active_procs
:
p
.
kill
()
self
.
_cleanup_sockets
()
self
.
workers
=
None
def
_cleanup_sockets
(
self
):
for
w
in
self
.
workers
:
# Remove the zmq ipc socket file
socket_path
=
w
.
ready_path
.
replace
(
"ipc://"
,
""
)
if
os
.
path
.
exists
(
socket_path
):
os
.
remove
(
socket_path
)
def
shutdown
(
self
):
"""Properly shut down the executor and its workers"""
if
(
hasattr
(
self
,
'workers'
)
and
self
.
workers
is
not
None
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment