Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d8e38d49
Unverified
Commit
d8e38d49
authored
Jan 05, 2026
by
Or Ozeri
Committed by
GitHub
Jan 05, 2026
Browse files
Triton Attention: Support cross-layers blocks (#30687)
Signed-off-by:
Or Ozeri
<
oro@il.ibm.com
>
parent
21156ff1
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
3 deletions
+14
-3
tests/v1/kv_offload/test_cpu_offloading.py
tests/v1/kv_offload/test_cpu_offloading.py
+1
-3
vllm/v1/attention/backends/triton_attn.py
vllm/v1/attention/backends/triton_attn.py
+13
-0
No files found.
tests/v1/kv_offload/test_cpu_offloading.py
View file @
d8e38d49
...
...
@@ -15,12 +15,10 @@ from vllm.distributed.kv_events import BlockStored, KVEventBatch
from
vllm.platforms
import
current_platform
CPU_BLOCK_SIZES
=
[
48
]
ATTN_BACKENDS
=
[
"FLASH_ATTN"
]
ATTN_BACKENDS
=
[
"FLASH_ATTN"
,
"TRITON_ATTN"
]
if
current_platform
.
is_cuda
():
ATTN_BACKENDS
.
append
(
"FLASHINFER"
)
elif
current_platform
.
is_rocm
():
ATTN_BACKENDS
=
[
"TRITON_ATTN"
]
class
MockSubscriber
:
...
...
vllm/v1/attention/backends/triton_attn.py
View file @
d8e38d49
...
...
@@ -290,6 +290,19 @@ class TritonAttentionBackend(AttentionBackend):
raise
ValueError
(
"Block size must be a multiple of 16."
)
return
(
num_blocks
,
2
,
block_size
,
num_kv_heads
,
head_size
)
@
staticmethod
def
get_kv_cache_stride_order
(
include_num_layers_dimension
:
bool
=
False
,
)
->
tuple
[
int
,
...]:
# `stride_order` indicates the permutation that gets
# us from `get_kv_cache_shape` to the actual memory layout we want.
if
include_num_layers_dimension
:
# (num_blocks, num_layers, 2, block_size, num_kv_heads, head_size)
return
(
1
,
0
,
2
,
3
,
4
,
5
)
# (num_blocks, 2, block_size, num_kv_heads, head_size)
return
(
0
,
1
,
2
,
3
,
4
)
@
staticmethod
def
use_cascade_attention
(
*
args
,
**
kwargs
)
->
bool
:
return
False
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment