Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
c3e29786
Unverified
Commit
c3e29786
authored
Nov 18, 2025
by
Chendi.Xue
Committed by
GitHub
Nov 18, 2025
Browse files
[NIXL] fix cpu PD after physical <> logical block_size PR (#28904)
Signed-off-by:
Chendi Xue
<
chendi.xue@intel.com
>
parent
e4bb2684
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
17 additions
and
5 deletions
+17
-5
tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
+7
-2
tools/install_nixl_from_source_ubuntu.py
tools/install_nixl_from_source_ubuntu.py
+1
-0
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
...distributed/kv_transfer/kv_connector/v1/nixl_connector.py
+9
-3
No files found.
tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
View file @
c3e29786
...
...
@@ -55,7 +55,7 @@ DECODE_BLOCK_SIZE=${DECODE_BLOCK_SIZE:-128}
# Find the git repository root directory
GIT_ROOT
=
$(
git rev-parse
--show-toplevel
)
SMI_BIN
=
$(
which nvidia-smi
||
which rocm-smi
)
SMI_BIN
=
$(
which nvidia-smi
||
which rocm-smi
||
echo
""
)
# Trap the SIGINT signal (triggered by Ctrl+C)
trap
'kill $(jobs -pr)'
SIGINT SIGTERM EXIT
...
...
@@ -91,8 +91,13 @@ get_model_args() {
get_num_gpus
()
{
if
[[
"
$SMI_BIN
"
==
*
"nvidia"
*
]]
;
then
echo
"
$(
$SMI_BIN
--query-gpu
=
name
--format
=
csv,noheader |
wc
-l
)
"
el
se
el
if
[[
"
$SMI_BIN
"
==
*
"rocm"
*
]]
;
then
echo
"
$(
$SMI_BIN
-l
|
grep
GPU |
wc
-l
)
"
else
# works for non-cuda platforms,
# assuming at least 1 device and
# let system to decide which card to use
echo
"1"
fi
}
...
...
tools/install_nixl_from_source_ubuntu.py
View file @
c3e29786
...
...
@@ -95,6 +95,7 @@ def install_system_dependencies():
"meson"
,
"libtool"
,
"libtool-bin"
,
"pkg-config"
,
]
run_command
([
"apt-get"
,
"update"
])
run_command
([
"apt-get"
,
"install"
,
"-y"
]
+
apt_packages
)
...
...
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
View file @
c3e29786
...
...
@@ -1161,6 +1161,14 @@ class NixlConnectorWorker:
# to better exploit the memory layout (ie num_blocks is the first dim).
split_k_and_v
=
self
.
kv_topo
.
split_k_and_v
tensor_size_bytes
=
None
# TODO (NickLucche): Get kernel_block_size in a cleaner way
# NHD default "view" for non-MLA cache
if
self
.
device_type
==
"cpu"
:
block_size_position
=
-
2
else
:
block_size_position
=
-
2
if
self
.
use_mla
else
-
3
# Enable different block lengths for different layers when MLA is used.
self
.
block_len_per_layer
=
list
[
int
]()
self
.
slot_size_per_layer
=
list
[
int
]()
# HD bytes in kv terms
...
...
@@ -1175,9 +1183,7 @@ class NixlConnectorWorker:
if
base_addr
in
seen_base_addresses
:
continue
# TODO (NickLucche): Get kernel_block_size in a cleaner way
# NHD default "view" for non-MLA cache
kernel_block_size
=
cache
.
shape
[
-
2
]
if
self
.
use_mla
else
cache
.
shape
[
-
3
]
kernel_block_size
=
cache
.
shape
[
block_size_position
]
if
self
.
block_size
!=
kernel_block_size
:
logger
.
info_once
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment