[BugFix][kv_offload]: Fix kernel block size detection (#35125)

Signed-off-by: Or Ozeri <oro@il.ibm.com>

[BugFix][kv_offload]: Fix kernel block size detection (#35125)
Signed-off-by: Or Ozeri <oro@il.ibm.com>
f2ad952f · Or Ozeri · GitHub · 9e2cabdf · f2ad952f
Unverified Commit f2ad952f authored Feb 26, 2026 by Or Ozeri Committed by GitHub Feb 26, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 14 additions and 10 deletions

vllm/v1/kv_offload/worker/cpu_gpu.py vllm/v1/kv_offload/worker/cpu_gpu.py +14 -10

No files found.
--- a/vllm/v1/kv_offload/worker/cpu_gpu.py
+++ b/vllm/v1/kv_offload/worker/cpu_gpu.py
@@ -259,16 +259,20 @@ class CpuGpuOffloadingHandlers:
                assert gpu_shape[0] == 2
                split_k_and_v = True
-            try:
+            if has_layers_dim:
-                kv_cache_stride_order = attn_backend.get_kv_cache_stride_order(
+                # in the cross layers case, the registered kv cache tensor
-                    include_num_layers_dimension=has_layers_dim
+                # shape matches the physical layout, whereas test_shape
-                )
+                # is the logical layout.
-                assert len(kv_cache_stride_order) == len(gpu_shape)
+                # To match them, we need to permute test_shape
-            except (AttributeError, NotImplementedError):
+                try:
-                kv_cache_stride_order = tuple(range(len(gpu_shape)))
+                    kv_cache_stride_order = attn_backend.get_kv_cache_stride_order(
+                        include_num_layers_dimension=has_layers_dim
-            # permute test_shape according to stride_order
+                    )
-            test_shape = tuple(test_shape[i] for i in kv_cache_stride_order)
+                    assert len(kv_cache_stride_order) == len(gpu_shape)
+                except (AttributeError, NotImplementedError):
+                    kv_cache_stride_order = tuple(range(len(gpu_shape)))
+                test_shape = tuple(test_shape[i] for i in kv_cache_stride_order)
            # find block_size (16) dimension index
            block_size_idx = test_shape.index(16)