Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
460d02a4
Unverified
Commit
460d02a4
authored
Nov 21, 2025
by
Chendi.Xue
Committed by
GitHub
Nov 21, 2025
Browse files
[NIXL] Fix after virtual block_size for host_buffer with heter kv_layout (#29122)
Signed-off-by:
Chendi Xue
<
chendi.xue@intel.com
>
parent
b4c8fbaa
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
13 additions
and
9 deletions
+13
-9
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
...distributed/kv_transfer/kv_connector/v1/nixl_connector.py
+13
-1
vllm/platforms/xpu.py
vllm/platforms/xpu.py
+0
-8
No files found.
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
View file @
460d02a4
...
@@ -1042,10 +1042,12 @@ class NixlConnectorWorker:
...
@@ -1042,10 +1042,12 @@ class NixlConnectorWorker:
NOT directly supported by NIXL (e.g., tpu)
NOT directly supported by NIXL (e.g., tpu)
"""
"""
xfer_buffers
:
dict
[
str
,
torch
.
Tensor
]
=
{}
xfer_buffers
:
dict
[
str
,
torch
.
Tensor
]
=
{}
inv_order
=
[
0
,
1
,
3
,
2
,
4
]
try
:
try
:
for
layer_name
,
kv_cache
in
kv_caches
.
items
():
for
layer_name
,
kv_cache
in
kv_caches
.
items
():
kv_shape
=
kv_cache
.
shape
kv_shape
=
kv_cache
.
shape
kv_dtype
=
kv_cache
.
dtype
kv_dtype
=
kv_cache
.
dtype
permute_shape
=
False
if
(
if
(
self
.
kv_cache_layout
==
"NHD"
self
.
kv_cache_layout
==
"NHD"
and
self
.
vllm_config
.
kv_transfer_config
is
not
None
and
self
.
vllm_config
.
kv_transfer_config
is
not
None
...
@@ -1059,10 +1061,20 @@ class NixlConnectorWorker:
...
@@ -1059,10 +1061,20 @@ class NixlConnectorWorker:
# Since NHD will not support Decode/Prefill TP_ratio > 1,
# Since NHD will not support Decode/Prefill TP_ratio > 1,
# we can leverage host_buffer for permute
# we can leverage host_buffer for permute
self
.
host_buffer_kv_cache_layout
=
"HND"
self
.
host_buffer_kv_cache_layout
=
"HND"
kv_shape
=
tuple
(
kv_shape
[
i
]
for
i
in
[
0
,
1
,
3
,
2
,
4
])
kv_shape
=
(
tuple
(
kv_shape
[
i
]
for
i
in
inv_order
)
if
not
self
.
use_mla
else
kv_shape
)
permute_shape
=
not
self
.
use_mla
xfer_buffers
[
layer_name
]
=
torch
.
empty
(
xfer_buffers
[
layer_name
]
=
torch
.
empty
(
kv_shape
,
dtype
=
kv_dtype
,
device
=
"cpu"
kv_shape
,
dtype
=
kv_dtype
,
device
=
"cpu"
)
)
if
permute_shape
:
xfer_buffers
[
layer_name
]
=
xfer_buffers
[
layer_name
].
permute
(
inv_order
)
except
MemoryError
as
e
:
except
MemoryError
as
e
:
logger
.
error
(
"NIXLConnectorWorker gets %s."
,
e
)
logger
.
error
(
"NIXLConnectorWorker gets %s."
,
e
)
raise
raise
...
...
vllm/platforms/xpu.py
View file @
460d02a4
...
@@ -251,10 +251,6 @@ class XPUPlatform(Platform):
...
@@ -251,10 +251,6 @@ class XPUPlatform(Platform):
)
->
None
:
)
->
None
:
"""Copy blocks from src_cache to dst_cache on XPU."""
"""Copy blocks from src_cache to dst_cache on XPU."""
_src_cache
=
src_cache
[:,
src_block_indices
]
_src_cache
=
src_cache
[:,
src_block_indices
]
if
_src_cache
.
shape
[
2
:]
!=
dst_cache
.
shape
[
2
:]:
# To support TP_ratio, HOST KV might be initiated with HND
# while XPU device KV is with NHD
_src_cache
=
_src_cache
.
permute
(
0
,
1
,
3
,
2
,
4
)
dst_cache
[:,
dst_block_indices
]
=
_src_cache
.
to
(
dst_cache
.
device
)
dst_cache
[:,
dst_block_indices
]
=
_src_cache
.
to
(
dst_cache
.
device
)
@
classmethod
@
classmethod
...
@@ -267,8 +263,4 @@ class XPUPlatform(Platform):
...
@@ -267,8 +263,4 @@ class XPUPlatform(Platform):
)
->
None
:
)
->
None
:
"""Copy blocks from XPU to host (CPU)."""
"""Copy blocks from XPU to host (CPU)."""
_src_cache
=
src_cache
[:,
src_block_indices
]
_src_cache
=
src_cache
[:,
src_block_indices
]
if
_src_cache
.
shape
[
2
:]
!=
dst_cache
.
shape
[
2
:]:
# XPU device KV is with NHD while HOST KV
# might be initiated with HND for TP_ratio support
_src_cache
=
_src_cache
.
permute
(
0
,
1
,
3
,
2
,
4
)
dst_cache
[:,
dst_block_indices
]
=
_src_cache
.
cpu
()
dst_cache
[:,
dst_block_indices
]
=
_src_cache
.
cpu
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment