Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b82662d9
Unverified
Commit
b82662d9
authored
Mar 15, 2025
by
Nick Hill
Committed by
GitHub
Mar 15, 2025
Browse files
[BugFix] Fix torch distributed stateless PG backend init (#14870)
Signed-off-by:
Nick Hill
<
nhill@redhat.com
>
parent
71c1e071
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
3 deletions
+8
-3
examples/offline_inference/data_parallel.py
examples/offline_inference/data_parallel.py
+5
-0
vllm/distributed/utils.py
vllm/distributed/utils.py
+3
-3
No files found.
examples/offline_inference/data_parallel.py
View file @
b82662d9
...
...
@@ -76,5 +76,10 @@ if __name__ == "__main__":
GPUs_per_dp_rank
))
proc
.
start
()
procs
.
append
(
proc
)
exit_code
=
0
for
proc
in
procs
:
proc
.
join
()
if
proc
.
exitcode
:
exit_code
=
proc
.
exitcode
exit
(
exit_code
)
vllm/distributed/utils.py
View file @
b82662d9
...
...
@@ -299,13 +299,10 @@ def stateless_init_torch_distributed_process_group(
# different systems (e.g. RPC) in case the store is multi-tenant.
prefix_store
=
PrefixStore
(
init_method
,
store
)
pg_options
=
ProcessGroup
.
Options
(
backend
=
backend
,
timeout
=
timeout
)
pg
:
ProcessGroup
=
ProcessGroup
(
prefix_store
,
group_rank
,
group_size
,
pg_options
,
)
if
backend
==
"gloo"
:
...
...
@@ -327,7 +324,10 @@ def stateless_init_torch_distributed_process_group(
backend_options
)
backend_type
=
ProcessGroup
.
BackendType
.
NCCL
device
=
torch
.
device
(
"cuda"
)
else
:
raise
RuntimeError
(
f
"Unsupported torch distributed backend:
{
backend
}
"
)
pg
.
_set_default_backend
(
backend_type
)
backend_class
.
_set_sequence_number_for_group
()
pg
.
_register_backend
(
device
,
backend_type
,
backend_class
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment