Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
norm
vllm
Commits
6d7d95a7
"vscode:/vscode.git/clone" did not exist on "3a278d701d3a0bba25ad52891653330ece2cb472"
Unverified
Commit
6d7d95a7
authored
Jul 16, 2023
by
Zhangir Azerbayev
Committed by
GitHub
Jul 15, 2023
Browse files
Offload port selection to OS (#467)
parent
96853af5
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
3 deletions
+9
-3
vllm/engine/ray_utils.py
vllm/engine/ray_utils.py
+9
-3
No files found.
vllm/engine/ray_utils.py
View file @
6d7d95a7
import
random
import
socket
from
typing
import
List
,
Optional
,
Tuple
from
typing
import
List
,
Optional
,
Tuple
try
:
try
:
...
@@ -12,6 +12,12 @@ from vllm.config import ParallelConfig
...
@@ -12,6 +12,12 @@ from vllm.config import ParallelConfig
DeviceID
=
Tuple
[
int
,
Optional
[
str
],
int
]
DeviceID
=
Tuple
[
int
,
Optional
[
str
],
int
]
def
get_open_port
():
with
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
)
as
s
:
s
.
bind
((
""
,
0
))
return
s
.
getsockname
()[
1
]
def
initialize_cluster
(
def
initialize_cluster
(
parallel_config
:
ParallelConfig
,
parallel_config
:
ParallelConfig
,
engine_use_ray
:
bool
=
False
,
engine_use_ray
:
bool
=
False
,
...
@@ -42,7 +48,7 @@ def initialize_cluster(
...
@@ -42,7 +48,7 @@ def initialize_cluster(
if
not
parallel_config
.
worker_use_ray
:
if
not
parallel_config
.
worker_use_ray
:
# Initialize cluster locally.
# Initialize cluster locally.
port
=
random
.
randint
(
10000
,
20000
)
port
=
get_open_port
(
)
# We need to setup the distributed init method to make sure
# We need to setup the distributed init method to make sure
# the distributed megatron code (e.g., get world size) works correctly.
# the distributed megatron code (e.g., get world size) works correctly.
distributed_init_method
=
f
"tcp://localhost:
{
port
}
"
distributed_init_method
=
f
"tcp://localhost:
{
port
}
"
...
@@ -96,7 +102,7 @@ def initialize_cluster(
...
@@ -96,7 +102,7 @@ def initialize_cluster(
stage_devices
.
append
((
rank
,
node_resource
,
current_device_id
))
stage_devices
.
append
((
rank
,
node_resource
,
current_device_id
))
if
distributed_init_method
is
None
:
if
distributed_init_method
is
None
:
ip
=
node_resource
.
split
(
"node:"
)[
-
1
]
ip
=
node_resource
.
split
(
"node:"
)[
-
1
]
port
=
random
.
randint
(
10000
,
20000
)
port
=
get_open_port
(
)
distributed_init_method
=
f
"tcp://
{
ip
}
:
{
port
}
"
distributed_init_method
=
f
"tcp://
{
ip
}
:
{
port
}
"
rank
+=
1
rank
+=
1
current_device_id
+=
1
current_device_id
+=
1
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment