Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
56498572
Unverified
Commit
56498572
authored
Aug 07, 2024
by
Robert Shaw
Committed by
GitHub
Aug 07, 2024
Browse files
[ BugFix ] Move `zmq` frontend to IPC instead of TCP (#7222)
parent
0f7052bc
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
29 additions
and
22 deletions
+29
-22
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+8
-4
vllm/entrypoints/openai/rpc/client.py
vllm/entrypoints/openai/rpc/client.py
+3
-3
vllm/entrypoints/openai/rpc/server.py
vllm/entrypoints/openai/rpc/server.py
+4
-6
vllm/envs.py
vllm/envs.py
+6
-5
vllm/utils.py
vllm/utils.py
+8
-4
No files found.
vllm/entrypoints/openai/api_server.py
View file @
56498572
...
...
@@ -43,7 +43,7 @@ from vllm.entrypoints.openai.serving_tokenization import (
OpenAIServingTokenization
)
from
vllm.logger
import
init_logger
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.utils
import
FlexibleArgumentParser
,
get_open_
port
from
vllm.utils
import
FlexibleArgumentParser
,
get_open_
zmq_ipc_path
from
vllm.version
import
__version__
as
VLLM_VERSION
TIMEOUT_KEEP_ALIVE
=
5
# seconds
...
...
@@ -106,16 +106,20 @@ async def build_async_engine_client(args) -> AsyncIterator[AsyncEngineClient]:
# Otherwise, use the multiprocessing AsyncLLMEngine.
else
:
# Select random path for IPC.
rpc_path
=
get_open_zmq_ipc_path
()
logger
.
info
(
"Multiprocessing frontend to use %s for RPC Path."
,
rpc_path
)
# Start RPCServer in separate process (holds the AsyncLLMEngine).
port
=
get_open_port
(
envs
.
VLLM_RPC_PORT
)
rpc_server_process
=
Process
(
target
=
run_rpc_server
,
args
=
(
engine_args
,
UsageContext
.
OPENAI_API_SERVER
,
port
))
rpc_path
))
rpc_server_process
.
start
()
# Build RPCClient, which conforms to AsyncEngineClient Protocol.
async_engine_client
=
AsyncEngineRPCClient
(
port
)
async_engine_client
=
AsyncEngineRPCClient
(
rpc_path
)
await
async_engine_client
.
setup
()
try
:
...
...
vllm/entrypoints/openai/rpc/client.py
View file @
56498572
...
...
@@ -21,9 +21,9 @@ from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
class
AsyncEngineRPCClient
:
def
__init__
(
self
,
port
:
int
):
def
__init__
(
self
,
rpc_path
:
str
):
self
.
context
=
zmq
.
asyncio
.
Context
()
self
.
path
=
f
"tcp://localhost:
{
port
}
"
self
.
rpc_
path
=
rpc_path
async
def
setup
(
self
):
"""Setup the client before it starts sending server requests."""
...
...
@@ -58,7 +58,7 @@ class AsyncEngineRPCClient:
# to enable streaming.
socket
=
self
.
context
.
socket
(
zmq
.
constants
.
DEALER
)
try
:
socket
.
connect
(
self
.
path
)
socket
.
connect
(
self
.
rpc_
path
)
yield
socket
finally
:
socket
.
close
()
...
...
vllm/entrypoints/openai/rpc/server.py
View file @
56498572
...
...
@@ -20,7 +20,7 @@ logger = init_logger(__name__)
class
AsyncEngineRPCServer
:
def
__init__
(
self
,
async_engine_args
:
AsyncEngineArgs
,
usage_context
:
UsageContext
,
port
:
int
):
usage_context
:
UsageContext
,
rpc_path
:
str
):
# Initialize engine first.
self
.
engine
=
AsyncLLMEngine
.
from_engine_args
(
async_engine_args
,
usage_context
)
...
...
@@ -30,9 +30,7 @@ class AsyncEngineRPCServer:
# Init socket for readiness state.
self
.
socket
=
self
.
context
.
socket
(
zmq
.
constants
.
ROUTER
)
# Note numeric form of localhost should be used for zmq bind(),
# see https://stackoverflow.com/a/8958414
self
.
socket
.
bind
(
f
"tcp://127.0.0.1:
{
port
}
"
)
self
.
socket
.
bind
(
rpc_path
)
def
cleanup
(
self
):
"""Cleanup all resources."""
...
...
@@ -213,6 +211,6 @@ async def run_server(server: AsyncEngineRPCServer):
def
run_rpc_server
(
async_engine_args
:
AsyncEngineArgs
,
usage_context
:
UsageContext
,
port
:
int
):
server
=
AsyncEngineRPCServer
(
async_engine_args
,
usage_context
,
port
)
usage_context
:
UsageContext
,
rpc_path
:
str
):
server
=
AsyncEngineRPCServer
(
async_engine_args
,
usage_context
,
rpc_path
)
asyncio
.
run
(
run_server
(
server
))
vllm/envs.py
View file @
56498572
import
os
import
tempfile
from
typing
import
TYPE_CHECKING
,
Any
,
Callable
,
Dict
,
Optional
if
TYPE_CHECKING
:
VLLM_HOST_IP
:
str
=
""
VLLM_PORT
:
Optional
[
int
]
=
None
VLLM_RPC_
PORT
:
int
=
5570
VLLM_RPC_
BASE_PATH
:
str
=
tempfile
.
gettempdir
()
VLLM_USE_MODELSCOPE
:
bool
=
False
VLLM_RINGBUFFER_WARNING_INTERVAL
:
int
=
60
VLLM_INSTANCE_ID
:
Optional
[
str
]
=
None
...
...
@@ -142,10 +143,10 @@ environment_variables: Dict[str, Callable[[], Any]] = {
lambda
:
int
(
os
.
getenv
(
'VLLM_PORT'
,
'0'
))
if
'VLLM_PORT'
in
os
.
environ
else
None
,
#
used
when the frontend api server is running in
multi-processing mode,
# to communicate with the backend engine process
over ZMQ
.
'VLLM_RPC_
PORT
'
:
lambda
:
int
(
os
.
getenv
(
'VLLM_RPC_
PORT'
,
'5570'
)),
#
path used for ipc
when the frontend api server is running in
#
multi-processing mode
to communicate with the backend engine process.
'VLLM_RPC_
BASE_PATH
'
:
lambda
:
os
.
getenv
(
'VLLM_RPC_
BASE_PATH'
,
tempfile
.
gettempdir
(
)),
# If true, will load models from ModelScope instead of Hugging Face Hub.
# note that the value is true or false, not numbers
...
...
vllm/utils.py
View file @
56498572
...
...
@@ -19,6 +19,7 @@ from platform import uname
from
typing
import
(
Any
,
AsyncGenerator
,
Awaitable
,
Callable
,
Dict
,
Generic
,
Hashable
,
List
,
Optional
,
OrderedDict
,
Set
,
Tuple
,
TypeVar
,
Union
,
overload
)
from
uuid
import
uuid4
import
numpy
as
np
import
numpy.typing
as
npt
...
...
@@ -484,10 +485,13 @@ def get_distributed_init_method(ip: str, port: int) -> str:
return
f
"tcp://[
{
ip
}
]:
{
port
}
"
if
":"
in
ip
else
f
"tcp://
{
ip
}
:
{
port
}
"
def
get_open_port
(
port
:
Optional
[
int
]
=
None
)
->
int
:
if
port
is
None
:
# Default behavior here is to return a port for multi-gpu communication
port
=
envs
.
VLLM_PORT
def
get_open_zmq_ipc_path
()
->
str
:
base_rpc_path
=
envs
.
VLLM_RPC_BASE_PATH
return
f
"ipc://
{
base_rpc_path
}
/
{
uuid4
()
}
"
def
get_open_port
()
->
int
:
port
=
envs
.
VLLM_PORT
if
port
is
not
None
:
while
True
:
try
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment