Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0b407479
Unverified
Commit
0b407479
authored
Jul 09, 2025
by
Kunshang Ji
Committed by
GitHub
Jul 09, 2025
Browse files
[misc]refactor `Platform.set_device` method (#20262)
Signed-off-by:
Kunshang Ji
<
kunshang.ji@intel.com
>
parent
5eaf5700
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
43 additions
and
4 deletions
+43
-4
vllm/platforms/cpu.py
vllm/platforms/cpu.py
+7
-0
vllm/platforms/cuda.py
vllm/platforms/cuda.py
+1
-1
vllm/platforms/hpu.py
vllm/platforms/hpu.py
+7
-0
vllm/platforms/interface.py
vllm/platforms/interface.py
+1
-1
vllm/platforms/rocm.py
vllm/platforms/rocm.py
+11
-0
vllm/platforms/tpu.py
vllm/platforms/tpu.py
+7
-0
vllm/platforms/xpu.py
vllm/platforms/xpu.py
+7
-0
vllm/v1/worker/gpu_worker.py
vllm/v1/worker/gpu_worker.py
+1
-1
vllm/v1/worker/xpu_worker.py
vllm/v1/worker/xpu_worker.py
+1
-1
No files found.
vllm/platforms/cpu.py
View file @
0b407479
...
...
@@ -75,6 +75,13 @@ class CpuPlatform(Platform):
def
get_device_total_memory
(
cls
,
device_id
:
int
=
0
)
->
int
:
return
psutil
.
virtual_memory
().
total
@
classmethod
def
set_device
(
cls
,
device
:
torch
.
device
)
->
None
:
"""
Set the device for the current platform.
"""
torch
.
cpu
.
set_device
(
device
)
@
classmethod
def
is_async_output_supported
(
cls
,
enforce_eager
:
Optional
[
bool
])
->
bool
:
return
False
...
...
vllm/platforms/cuda.py
View file @
0b407479
...
...
@@ -77,7 +77,7 @@ class CudaPlatformBase(Platform):
"""
Set the device for the current platform.
"""
super
()
.
set_device
(
device
)
torch
.
cuda
.
set_device
(
device
)
# With this trick we can force the device to be set eagerly
# see https://github.com/pytorch/pytorch/issues/155668
# for why and when it is needed
...
...
vllm/platforms/hpu.py
View file @
0b407479
...
...
@@ -45,6 +45,13 @@ class HpuPlatform(Platform):
def
inference_mode
(
cls
):
return
torch
.
no_grad
()
@
classmethod
def
set_device
(
cls
,
device
:
torch
.
device
)
->
None
:
"""
Set the device for the current platform.
"""
torch
.
hpu
.
set_device
(
device
)
@
classmethod
def
check_and_update_config
(
cls
,
vllm_config
:
VllmConfig
)
->
None
:
...
...
vllm/platforms/interface.py
View file @
0b407479
...
...
@@ -305,7 +305,7 @@ class Platform:
"""
Set the device for the current platform.
"""
torch
.
cuda
.
set_device
(
device
)
raise
NotImplementedError
@
classmethod
def
pre_register_and_update
(
cls
,
...
...
vllm/platforms/rocm.py
View file @
0b407479
...
...
@@ -241,6 +241,17 @@ class RocmPlatform(Platform):
logger
.
info
(
"Using ROCmFlashAttention backend."
)
return
"vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionBackend"
# noqa: E501
@
classmethod
def
set_device
(
cls
,
device
:
torch
.
device
)
->
None
:
"""
Set the device for the current platform.
"""
torch
.
cuda
.
set_device
(
device
)
# With this trick we can force the device to be set eagerly
# see https://github.com/pytorch/pytorch/issues/155668
# for why and when it is needed
_
=
torch
.
zeros
(
1
,
device
=
device
)
@
classmethod
@
lru_cache
(
maxsize
=
8
)
def
get_device_capability
(
cls
,
...
...
vllm/platforms/tpu.py
View file @
0b407479
...
...
@@ -55,6 +55,13 @@ class TpuPlatform(Platform):
logger
.
info
(
"Using Pallas V1 backend."
)
return
"vllm.v1.attention.backends.pallas.PallasAttentionBackend"
@
classmethod
def
set_device
(
cls
,
device
:
torch
.
device
)
->
None
:
"""
Set the device for the current platform.
"""
torch
.
tpu
.
set_device
(
device
)
@
classmethod
def
get_device_name
(
cls
,
device_id
:
int
=
0
)
->
str
:
chip_type
,
_
=
device
.
get_local_chips
()
...
...
vllm/platforms/xpu.py
View file @
0b407479
...
...
@@ -45,6 +45,13 @@ class XPUPlatform(Platform):
logger
.
info
(
"Using Flash Attention backend on V1 engine."
)
return
"vllm.v1.attention.backends.flash_attn.FlashAttentionBackend"
@
classmethod
def
set_device
(
cls
,
device
:
torch
.
device
)
->
None
:
"""
Set the device for the current platform.
"""
torch
.
xpu
.
set_device
(
device
)
@
classmethod
def
get_device_capability
(
cls
,
...
...
vllm/v1/worker/gpu_worker.py
View file @
0b407479
...
...
@@ -130,7 +130,7 @@ class Worker(WorkerBase):
# This env var set by Ray causes exceptions with graph building.
os
.
environ
.
pop
(
"NCCL_ASYNC_ERROR_HANDLING"
,
None
)
self
.
device
=
torch
.
device
(
f
"cuda:
{
self
.
local_rank
}
"
)
torch
.
cuda
.
set_device
(
self
.
device
)
current_platform
.
set_device
(
self
.
device
)
_check_if_gpu_supports_dtype
(
self
.
model_config
.
dtype
)
gc
.
collect
()
...
...
vllm/v1/worker/xpu_worker.py
View file @
0b407479
...
...
@@ -132,7 +132,7 @@ class XPUWorker(Worker):
if
self
.
device_config
.
device
.
type
==
"xpu"
and
current_platform
.
is_xpu
(
):
self
.
device
=
torch
.
device
(
f
"xpu:
{
self
.
local_rank
}
"
)
torch
.
xpu
.
set_device
(
self
.
device
)
current_platform
.
set_device
(
self
.
device
)
torch
.
xpu
.
empty_cache
()
self
.
init_gpu_memory
=
torch
.
xpu
.
get_device_properties
(
self
.
local_rank
).
total_memory
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment