Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
73e688cb
Unverified
Commit
73e688cb
authored
Sep 09, 2025
by
Charlie Fu
Committed by
GitHub
Sep 09, 2025
Browse files
[ROCm][Feature] Enable Pipeline Parallelism with Ray Compiled Graph on ROCm (#24275)
Signed-off-by:
charlifu
<
charlifu@amd.com
>
parent
fb1a8f93
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
16 additions
and
3 deletions
+16
-3
docker/Dockerfile.rocm
docker/Dockerfile.rocm
+1
-0
requirements/rocm.txt
requirements/rocm.txt
+1
-1
vllm/utils/__init__.py
vllm/utils/__init__.py
+14
-2
No files found.
docker/Dockerfile.rocm
View file @
73e688cb
...
@@ -104,6 +104,7 @@ COPY --from=export_vllm /examples ${COMMON_WORKDIR}/vllm/examples
...
@@ -104,6 +104,7 @@ COPY --from=export_vllm /examples ${COMMON_WORKDIR}/vllm/examples
COPY --from=export_vllm /docker ${COMMON_WORKDIR}/vllm/docker
COPY --from=export_vllm /docker ${COMMON_WORKDIR}/vllm/docker
ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
ENV RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES=1
ENV TOKENIZERS_PARALLELISM=false
ENV TOKENIZERS_PARALLELISM=false
# ENV that can improve safe tensor loading, and end-to-end time
# ENV that can improve safe tensor loading, and end-to-end time
...
...
requirements/rocm.txt
View file @
73e688cb
...
@@ -8,7 +8,7 @@ numba == 0.61.2; python_version > '3.9'
...
@@ -8,7 +8,7 @@ numba == 0.61.2; python_version > '3.9'
boto3
boto3
botocore
botocore
datasets
datasets
ray
>=2.10.0,<2.45.0
ray
[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
peft
peft
pytest-asyncio
pytest-asyncio
tensorizer==2.10.1
tensorizer==2.10.1
...
...
vllm/utils/__init__.py
View file @
73e688cb
...
@@ -78,6 +78,7 @@ if TYPE_CHECKING:
...
@@ -78,6 +78,7 @@ if TYPE_CHECKING:
from
argparse
import
Namespace
from
argparse
import
Namespace
from
vllm.config
import
ModelConfig
,
VllmConfig
from
vllm.config
import
ModelConfig
,
VllmConfig
from
vllm.sequence
import
IntermediateTensors
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
@@ -1472,7 +1473,8 @@ def current_stream() -> torch.cuda.Stream:
...
@@ -1472,7 +1473,8 @@ def current_stream() -> torch.cuda.Stream:
# is hurting performance. Therefore creating a dedicated stream
# is hurting performance. Therefore creating a dedicated stream
# per process
# per process
if
current_platform
.
is_rocm
():
if
current_platform
.
is_rocm
():
_current_stream_tls
.
value
=
torch
.
cuda
.
Stream
()
# torch.cuda.set_stream here is the alias of _pathed_set_stream
torch
.
cuda
.
set_stream
(
torch
.
cuda
.
Stream
())
elif
current_platform
.
is_cpu
():
elif
current_platform
.
is_cpu
():
_current_stream_tls
.
value
=
_StreamPlaceholder
()
_current_stream_tls
.
value
=
_StreamPlaceholder
()
else
:
else
:
...
@@ -2278,7 +2280,8 @@ def weak_ref_tensor(tensor: Any) -> Any:
...
@@ -2278,7 +2280,8 @@ def weak_ref_tensor(tensor: Any) -> Any:
def
weak_ref_tensors
(
def
weak_ref_tensors
(
tensors
:
Union
[
torch
.
Tensor
,
list
[
torch
.
Tensor
],
tuple
[
torch
.
Tensor
]]
tensors
:
Union
[
torch
.
Tensor
,
list
[
torch
.
Tensor
],
tuple
[
torch
.
Tensor
],
IntermediateTensors
]
)
->
Union
[
torch
.
Tensor
,
list
[
Any
],
tuple
[
Any
],
Any
]:
)
->
Union
[
torch
.
Tensor
,
list
[
Any
],
tuple
[
Any
],
Any
]:
"""
"""
Convenience function to create weak references to tensors,
Convenience function to create weak references to tensors,
...
@@ -2290,6 +2293,15 @@ def weak_ref_tensors(
...
@@ -2290,6 +2293,15 @@ def weak_ref_tensors(
return
[
weak_ref_tensor
(
t
)
for
t
in
tensors
]
return
[
weak_ref_tensor
(
t
)
for
t
in
tensors
]
if
isinstance
(
tensors
,
tuple
):
if
isinstance
(
tensors
,
tuple
):
return
tuple
(
weak_ref_tensor
(
t
)
for
t
in
tensors
)
return
tuple
(
weak_ref_tensor
(
t
)
for
t
in
tensors
)
# For IntermediateTensors used in pipeline parallelism
from
vllm.sequence
import
IntermediateTensors
if
isinstance
(
tensors
,
IntermediateTensors
):
ret
=
IntermediateTensors
({
key
:
weak_ref_tensor
(
val
)
for
key
,
val
in
tensors
.
tensors
.
items
()
})
return
ret
raise
ValueError
(
"Invalid type for tensors"
)
raise
ValueError
(
"Invalid type for tensors"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment