Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e969a169
Unverified
Commit
e969a169
authored
Feb 06, 2026
by
Xinyu Chen
Committed by
GitHub
Feb 06, 2026
Browse files
support view_from_cpu_tensor on XPU (#33868)
Signed-off-by:
Xinyu Chen
<
xinyu1.chen@intel.com
>
parent
6d8d34be
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
13 additions
and
9 deletions
+13
-9
tests/kernels/core/test_uva.py
tests/kernels/core/test_uva.py
+3
-3
vllm/model_executor/models/utils.py
vllm/model_executor/models/utils.py
+2
-2
vllm/utils/torch_utils.py
vllm/utils/torch_utils.py
+6
-2
vllm/v1/worker/gpu/buffer_utils.py
vllm/v1/worker/gpu/buffer_utils.py
+2
-2
No files found.
tests/kernels/core/test_uva.py
View file @
e969a169
...
...
@@ -4,7 +4,7 @@ import pytest
import
torch
from
vllm.utils.platform_utils
import
is_uva_available
from
vllm.utils.torch_utils
import
get_
cuda
_view_from_cpu_tensor
from
vllm.utils.torch_utils
import
get_
accelerator
_view_from_cpu_tensor
CUDA_DEVICES
=
[
f
"cuda:
{
i
}
"
for
i
in
range
(
1
if
torch
.
cuda
.
device_count
()
==
1
else
2
)]
...
...
@@ -14,7 +14,7 @@ CUDA_DEVICES = [f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 e
def
test_cpu_write
(
device
):
torch
.
set_default_device
(
device
)
cpu_tensor
=
torch
.
zeros
(
10
,
10
,
device
=
"cpu"
,
pin_memory
=
True
,
dtype
=
torch
.
int32
)
cuda_view
=
get_
cuda
_view_from_cpu_tensor
(
cpu_tensor
)
cuda_view
=
get_
accelerator
_view_from_cpu_tensor
(
cpu_tensor
)
assert
cuda_view
.
device
.
type
==
"cuda"
assert
cuda_view
[
0
,
0
]
==
0
...
...
@@ -36,7 +36,7 @@ def test_cpu_write(device):
def
test_gpu_write
(
device
):
torch
.
set_default_device
(
device
)
cpu_tensor
=
torch
.
zeros
(
10
,
10
,
device
=
"cpu"
,
pin_memory
=
True
,
dtype
=
torch
.
int32
)
cuda_view
=
get_
cuda
_view_from_cpu_tensor
(
cpu_tensor
)
cuda_view
=
get_
accelerator
_view_from_cpu_tensor
(
cpu_tensor
)
assert
cuda_view
.
device
.
type
==
"cuda"
assert
cuda_view
[
0
,
0
]
==
0
...
...
vllm/model_executor/models/utils.py
View file @
e969a169
...
...
@@ -36,7 +36,7 @@ from vllm.utils.platform_utils import (
)
from
vllm.utils.torch_utils
import
(
direct_register_custom_op
,
get_
cuda
_view_from_cpu_tensor
,
get_
accelerator
_view_from_cpu_tensor
,
)
logger
=
init_logger
(
__name__
)
...
...
@@ -663,7 +663,7 @@ def maybe_offload_to_cpu(module: torch.nn.Module) -> torch.nn.Module:
else
:
# keep the cpu data alive
p
.
_vllm_offloaded_cpu_data
=
cpu_data
p
.
data
=
get_
cuda
_view_from_cpu_tensor
(
cpu_data
)
p
.
data
=
get_
accelerator
_view_from_cpu_tensor
(
cpu_data
)
_CPU_OFFLOAD_BYTES
+=
p
.
data
.
numel
()
*
p
.
data
.
element_size
()
offloaded_parameters
=
True
...
...
vllm/utils/torch_utils.py
View file @
e969a169
...
...
@@ -674,11 +674,15 @@ def weak_ref_tensors(
raise
ValueError
(
"Invalid type for tensors"
)
def
get_
cuda
_view_from_cpu_tensor
(
cpu_tensor
:
torch
.
Tensor
)
->
torch
.
Tensor
:
def
get_
accelerator
_view_from_cpu_tensor
(
cpu_tensor
:
torch
.
Tensor
)
->
torch
.
Tensor
:
"""
Get a
CUDA
view of a CPU tensor using Unified Virtual Addressing (UVA).
Get a
n accelerator
view of a CPU tensor using Unified Virtual Addressing (UVA).
"""
assert
cpu_tensor
.
is_pinned
(),
"CPU tensor must be pinned"
from
vllm.platforms
import
current_platform
if
current_platform
.
is_xpu
():
return
torch
.
ops
.
_C
.
get_xpu_view_from_cpu_tensor
(
cpu_tensor
)
return
torch
.
ops
.
_C
.
get_cuda_view_from_cpu_tensor
(
cpu_tensor
)
...
...
vllm/v1/worker/gpu/buffer_utils.py
View file @
e969a169
...
...
@@ -9,7 +9,7 @@ import torch
from
vllm.triton_utils
import
tl
,
triton
from
vllm.utils.math_utils
import
next_power_of_2
from
vllm.utils.platform_utils
import
is_uva_available
from
vllm.utils.torch_utils
import
get_
cuda
_view_from_cpu_tensor
from
vllm.utils.torch_utils
import
get_
accelerator
_view_from_cpu_tensor
def
async_copy_to_gpu
(
...
...
@@ -38,7 +38,7 @@ class UvaBuffer:
raise
RuntimeError
(
"UVA is not available"
)
self
.
cpu
=
torch
.
zeros
(
size
,
dtype
=
dtype
,
device
=
"cpu"
,
pin_memory
=
True
)
self
.
np
=
self
.
cpu
.
numpy
()
self
.
uva
=
get_
cuda
_view_from_cpu_tensor
(
self
.
cpu
)
self
.
uva
=
get_
accelerator
_view_from_cpu_tensor
(
self
.
cpu
)
class
UvaBufferPool
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment