Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3eea7488
Unverified
Commit
3eea7488
authored
Jun 19, 2024
by
youkaichao
Committed by
GitHub
Jun 19, 2024
Browse files
[misc][distributed] use 127.0.0.1 for single-node (#5619)
parent
f758aed0
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
15 additions
and
2 deletions
+15
-2
vllm/executor/multiproc_gpu_executor.py
vllm/executor/multiproc_gpu_executor.py
+5
-2
vllm/executor/ray_gpu_executor.py
vllm/executor/ray_gpu_executor.py
+10
-0
No files found.
vllm/executor/multiproc_gpu_executor.py
View file @
3eea7488
...
...
@@ -10,7 +10,7 @@ from vllm.executor.multiproc_worker_utils import (ProcessWorkerWrapper,
from
vllm.logger
import
init_logger
from
vllm.sequence
import
ExecuteModelRequest
,
SamplerOutput
from
vllm.utils
import
(
cuda_device_count_stateless
,
get_distributed_init_method
,
get_ip
,
get_open_port
,
get_distributed_init_method
,
get_open_port
,
get_vllm_instance_id
,
make_async
)
logger
=
init_logger
(
__name__
)
...
...
@@ -37,8 +37,11 @@ class MultiprocessingGPUExecutor(DistributedGPUExecutor):
assert
world_size
<=
cuda_device_count_stateless
(),
(
"please set tensor_parallel_size to less than max local gpu count"
)
# Multiprocessing-based executor does not support multi-node setting.
# Since it only works for single node, we can use the loopback address
# 127.0.0.1 for communication.
distributed_init_method
=
get_distributed_init_method
(
get_ip
()
,
get_open_port
())
"127.0.0.1"
,
get_open_port
())
if
world_size
==
1
:
self
.
workers
=
[]
...
...
vllm/executor/ray_gpu_executor.py
View file @
3eea7488
...
...
@@ -161,6 +161,16 @@ class RayGPUExecutor(DistributedGPUExecutor):
self
.
_run_workers
(
"update_environment_variables"
,
all_args
=
all_args_to_update_environment_variables
)
if
len
(
node_gpus
)
==
1
:
# in single node case, we don't need to get the IP address.
# the loopback address is sufficient
# NOTE: a node may have several IP addresses, one for each
# network interface. `get_ip()` might return any of them,
# while they might not work for communication inside the node
# if the network setup is complicated. Using the loopback address
# solves this issue, as it always works for communication inside
# the node.
driver_ip
=
"127.0.0.1"
distributed_init_method
=
get_distributed_init_method
(
driver_ip
,
get_open_port
())
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment