Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b6c16cf8
Unverified
Commit
b6c16cf8
authored
Jul 12, 2024
by
Hongxia Yang
Committed by
GitHub
Jul 11, 2024
Browse files
[ROCm][AMD] unify CUDA_VISIBLE_DEVICES usage in cuda/rocm (#6352)
parent
d26a8b3f
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
10 additions
and
34 deletions
+10
-34
Dockerfile.rocm
Dockerfile.rocm
+7
-7
tests/distributed/test_utils.py
tests/distributed/test_utils.py
+1
-6
vllm/config.py
vllm/config.py
+1
-8
vllm/utils.py
vllm/utils.py
+0
-4
vllm/worker/worker_base.py
vllm/worker/worker_base.py
+1
-9
No files found.
Dockerfile.rocm
View file @
b6c16cf8
...
@@ -52,25 +52,25 @@ RUN pip install --upgrade pip
...
@@ -52,25 +52,25 @@ RUN pip install --upgrade pip
# Remove sccache so it doesn't interfere with ccache
# Remove sccache so it doesn't interfere with ccache
# TODO: implement sccache support across components
# TODO: implement sccache support across components
RUN apt-get purge -y sccache; pip uninstall -y sccache; rm -f "$(which sccache)"
RUN apt-get purge -y sccache; pip uninstall -y sccache; rm -f "$(which sccache)"
# Install torch == 2.
4
.0 on ROCm
# Install torch == 2.
5
.0 on ROCm
RUN case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \
RUN case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \
*"rocm-5.7"*) \
*"rocm-5.7"*) \
pip uninstall -y torch torchaudio torchvision \
pip uninstall -y torch torchaudio torchvision \
&& pip install --no-cache-dir --pre \
&& pip install --no-cache-dir --pre \
torch==2.
4
.0.dev20240
612
torchaudio==2.4.0.dev20240
612
\
torch==2.
5
.0.dev20240
710
torchaudio==2.4.0.dev20240
710
\
torchvision==0.
19
.0.dev20240
612
\
torchvision==0.
20
.0.dev20240
710
\
--index-url https://download.pytorch.org/whl/nightly/rocm5.7;; \
--index-url https://download.pytorch.org/whl/nightly/rocm5.7;; \
*"rocm-6.0"*) \
*"rocm-6.0"*) \
pip uninstall -y torch torchaudio torchvision \
pip uninstall -y torch torchaudio torchvision \
&& pip install --no-cache-dir --pre \
&& pip install --no-cache-dir --pre \
torch==2.
4
.0.dev20240
612
torchaudio==2.4.0.dev20240
612
\
torch==2.
5
.0.dev20240
710
torchaudio==2.4.0.dev20240
710
\
torchvision==0.
19
.0.dev20240
612
\
torchvision==0.
20
.0.dev20240
710
\
--index-url https://download.pytorch.org/whl/nightly/rocm6.0;; \
--index-url https://download.pytorch.org/whl/nightly/rocm6.0;; \
*"rocm-6.1"*) \
*"rocm-6.1"*) \
pip uninstall -y torch torchaudio torchvision \
pip uninstall -y torch torchaudio torchvision \
&& pip install --no-cache-dir --pre \
&& pip install --no-cache-dir --pre \
torch==2.
4
.0.dev20240
612
torchaudio==2.4.0.dev20240
612
\
torch==2.
5
.0.dev20240
710
torchaudio==2.4.0.dev20240
710
\
torchvision==0.
19
.0.dev20240
612
\
torchvision==0.
20
.0.dev20240
710
\
--index-url https://download.pytorch.org/whl/nightly/rocm6.1;; \
--index-url https://download.pytorch.org/whl/nightly/rocm6.1;; \
*) ;; esac
*) ;; esac
...
...
tests/distributed/test_utils.py
View file @
b6c16cf8
import
ray
import
ray
import
vllm.envs
as
envs
import
vllm.envs
as
envs
from
vllm.utils
import
(
cuda_device_count_stateless
,
is_hip
,
from
vllm.utils
import
(
cuda_device_count_stateless
,
update_environment_variables
)
update_environment_variables
)
...
@@ -22,11 +22,6 @@ class _CUDADeviceCountStatelessTestActor:
...
@@ -22,11 +22,6 @@ class _CUDADeviceCountStatelessTestActor:
def
test_cuda_device_count_stateless
():
def
test_cuda_device_count_stateless
():
"""Test that cuda_device_count_stateless changes return value if
"""Test that cuda_device_count_stateless changes return value if
CUDA_VISIBLE_DEVICES is changed."""
CUDA_VISIBLE_DEVICES is changed."""
if
is_hip
():
# Set HIP_VISIBLE_DEVICES == CUDA_VISIBLE_DEVICES. Conversion
# is handled by `update_environment_variables`
update_environment_variables
(
{
"CUDA_VISIBLE_DEVICES"
:
envs
.
CUDA_VISIBLE_DEVICES
})
actor
=
_CUDADeviceCountStatelessTestActor
.
options
(
# type: ignore
actor
=
_CUDADeviceCountStatelessTestActor
.
options
(
# type: ignore
num_gpus
=
2
).
remote
()
num_gpus
=
2
).
remote
()
assert
sorted
(
ray
.
get
(
assert
sorted
(
ray
.
get
(
...
...
vllm/config.py
View file @
b6c16cf8
...
@@ -6,7 +6,6 @@ from typing import TYPE_CHECKING, ClassVar, List, Optional, Tuple, Union
...
@@ -6,7 +6,6 @@ from typing import TYPE_CHECKING, ClassVar, List, Optional, Tuple, Union
import
torch
import
torch
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
import
vllm.envs
as
envs
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.quantization
import
QUANTIZATION_METHODS
from
vllm.model_executor.layers.quantization
import
QUANTIZATION_METHODS
from
vllm.model_executor.models
import
ModelRegistry
from
vllm.model_executor.models
import
ModelRegistry
...
@@ -14,7 +13,7 @@ from vllm.tracing import is_otel_installed
...
@@ -14,7 +13,7 @@ from vllm.tracing import is_otel_installed
from
vllm.transformers_utils.config
import
get_config
,
get_hf_text_config
from
vllm.transformers_utils.config
import
get_config
,
get_hf_text_config
from
vllm.utils
import
(
cuda_device_count_stateless
,
get_cpu_memory
,
is_cpu
,
from
vllm.utils
import
(
cuda_device_count_stateless
,
get_cpu_memory
,
is_cpu
,
is_hip
,
is_neuron
,
is_openvino
,
is_tpu
,
is_xpu
,
is_hip
,
is_neuron
,
is_openvino
,
is_tpu
,
is_xpu
,
print_warning_once
,
update_environment_variables
)
print_warning_once
)
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
from
ray.util.placement_group
import
PlacementGroup
from
ray.util.placement_group
import
PlacementGroup
...
@@ -695,12 +694,6 @@ class ParallelConfig:
...
@@ -695,12 +694,6 @@ class ParallelConfig:
self
.
distributed_executor_backend
=
backend
self
.
distributed_executor_backend
=
backend
logger
.
info
(
"Defaulting to use %s for distributed inference"
,
logger
.
info
(
"Defaulting to use %s for distributed inference"
,
backend
)
backend
)
# If CUDA_VISIBLE_DEVICES is set on ROCm prior to vLLM init,
# propagate changes to HIP_VISIBLE_DEVICES (conversion handled by
# the update_environment_variables function)
if
is_hip
()
and
envs
.
CUDA_VISIBLE_DEVICES
:
update_environment_variables
(
{
"CUDA_VISIBLE_DEVICES"
:
envs
.
CUDA_VISIBLE_DEVICES
})
self
.
_verify_args
()
self
.
_verify_args
()
self
.
rank
=
0
self
.
rank
=
0
...
...
vllm/utils.py
View file @
b6c16cf8
...
@@ -386,10 +386,6 @@ def get_open_port() -> int:
...
@@ -386,10 +386,6 @@ def get_open_port() -> int:
def
update_environment_variables
(
envs
:
Dict
[
str
,
str
]):
def
update_environment_variables
(
envs
:
Dict
[
str
,
str
]):
if
is_hip
()
and
"CUDA_VISIBLE_DEVICES"
in
envs
:
# Propagate changes to CUDA_VISIBLE_DEVICES to
# ROCm's HIP_VISIBLE_DEVICES as well
envs
[
"HIP_VISIBLE_DEVICES"
]
=
envs
[
"CUDA_VISIBLE_DEVICES"
]
for
k
,
v
in
envs
.
items
():
for
k
,
v
in
envs
.
items
():
if
k
in
os
.
environ
and
os
.
environ
[
k
]
!=
v
:
if
k
in
os
.
environ
and
os
.
environ
[
k
]
!=
v
:
logger
.
warning
(
logger
.
warning
(
...
...
vllm/worker/worker_base.py
View file @
b6c16cf8
...
@@ -11,7 +11,7 @@ from vllm.logger import init_logger
...
@@ -11,7 +11,7 @@ from vllm.logger import init_logger
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
vllm.sequence
import
(
ExecuteModelRequest
,
IntermediateTensors
,
from
vllm.sequence
import
(
ExecuteModelRequest
,
IntermediateTensors
,
SamplerOutput
)
SamplerOutput
)
from
vllm.utils
import
(
enable_trace_function_call_for_thread
,
is_hip
,
from
vllm.utils
import
(
enable_trace_function_call_for_thread
,
update_environment_variables
)
update_environment_variables
)
from
vllm.worker.model_runner_base
import
ModelRunnerBase
,
ModelRunnerInputBase
from
vllm.worker.model_runner_base
import
ModelRunnerBase
,
ModelRunnerInputBase
...
@@ -309,14 +309,6 @@ class WorkerWrapperBase:
...
@@ -309,14 +309,6 @@ class WorkerWrapperBase:
# overwriting CUDA_VISIBLE_DEVICES is desired behavior
# overwriting CUDA_VISIBLE_DEVICES is desired behavior
# suppress the warning in `update_environment_variables`
# suppress the warning in `update_environment_variables`
del
os
.
environ
[
key
]
del
os
.
environ
[
key
]
if
is_hip
():
hip_env_var
=
"HIP_VISIBLE_DEVICES"
if
hip_env_var
in
os
.
environ
:
logger
.
warning
(
"Ignoring pre-set environment variable `%s=%s` as "
"%s has also been set, which takes precedence."
,
hip_env_var
,
os
.
environ
[
hip_env_var
],
key
)
os
.
environ
.
pop
(
hip_env_var
,
None
)
update_environment_variables
(
envs
)
update_environment_variables
(
envs
)
def
init_worker
(
self
,
*
args
,
**
kwargs
):
def
init_worker
(
self
,
*
args
,
**
kwargs
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment