Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
7c7adf81
Unverified
Commit
7c7adf81
authored
Feb 17, 2025
by
Divakar Verma
Committed by
GitHub
Feb 18, 2025
Browse files
[ROCm] fix get_device_name for rocm (#13438)
Signed-off-by:
Divakar Verma
<
divakar.verma@amd.com
>
parent
67ef8f66
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
43 additions
and
6 deletions
+43
-6
vllm/platforms/rocm.py
vllm/platforms/rocm.py
+43
-6
No files found.
vllm/platforms/rocm.py
View file @
7c7adf81
# SPDX-License-Identifier: Apache-2.0
from
functools
import
lru_cache
import
os
from
functools
import
lru_cache
,
wraps
from
typing
import
TYPE_CHECKING
,
Dict
,
List
,
Optional
import
torch
from
amdsmi
import
(
amdsmi_get_gpu_asic_info
,
amdsmi_get_processor_handles
,
amdsmi_init
,
amdsmi_shut_down
)
import
vllm.envs
as
envs
from
vllm.logger
import
init_logger
...
...
@@ -53,6 +56,41 @@ _ROCM_PARTIALLY_SUPPORTED_MODELS: Dict[str, str] = {
"by setting `VLLM_USE_TRITON_FLASH_ATTN=0`"
)
}
# Prevent use of clashing `{CUDA/HIP}_VISIBLE_DEVICES``
if
"HIP_VISIBLE_DEVICES"
in
os
.
environ
:
val
=
os
.
environ
[
"HIP_VISIBLE_DEVICES"
]
if
cuda_val
:
=
os
.
environ
.
get
(
"CUDA_VISIBLE_DEVICES"
,
None
):
assert
val
==
cuda_val
else
:
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
=
val
# AMDSMI utils
# Note that NVML is not affected by `{CUDA/HIP}_VISIBLE_DEVICES`,
# all the related functions work on real physical device ids.
# the major benefit of using AMDSMI is that it will not initialize CUDA
def
with_amdsmi_context
(
fn
):
@
wraps
(
fn
)
def
wrapper
(
*
args
,
**
kwargs
):
amdsmi_init
()
try
:
return
fn
(
*
args
,
**
kwargs
)
finally
:
amdsmi_shut_down
()
return
wrapper
def
device_id_to_physical_device_id
(
device_id
:
int
)
->
int
:
if
"CUDA_VISIBLE_DEVICES"
in
os
.
environ
:
device_ids
=
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
].
split
(
","
)
physical_device_id
=
device_ids
[
device_id
]
return
int
(
physical_device_id
)
else
:
return
device_id
class
RocmPlatform
(
Platform
):
_enum
=
PlatformEnum
.
ROCM
...
...
@@ -96,13 +134,12 @@ class RocmPlatform(Platform):
return
DeviceCapability
(
major
=
major
,
minor
=
minor
)
@
classmethod
@
with_amdsmi_context
@
lru_cache
(
maxsize
=
8
)
def
get_device_name
(
cls
,
device_id
:
int
=
0
)
->
str
:
# NOTE: When using V1 this function is called when overriding the
# engine args. Calling torch.cuda.get_device_name(device_id) here
# will result in the ROCm context being initialized before other
# processes can be created.
return
"AMD"
physical_device_id
=
device_id_to_physical_device_id
(
device_id
)
handle
=
amdsmi_get_processor_handles
()[
physical_device_id
]
return
amdsmi_get_gpu_asic_info
(
handle
)[
"market_name"
]
@
classmethod
def
get_device_total_memory
(
cls
,
device_id
:
int
=
0
)
->
int
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment