Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bf3ffb61
Unverified
Commit
bf3ffb61
authored
Nov 14, 2025
by
Benjamin Chislett
Committed by
GitHub
Nov 14, 2025
Browse files
[Bugfix] Fix ChunkedLocalAttention CUDA Graph setting (#28739)
Signed-off-by:
Benjamin Chislett
<
bchislett@nvidia.com
>
parent
e5c78956
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
16 additions
and
3 deletions
+16
-3
vllm/attention/layers/chunked_local_attention.py
vllm/attention/layers/chunked_local_attention.py
+16
-3
No files found.
vllm/attention/layers/chunked_local_attention.py
View file @
bf3ffb61
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
functools
from
typing
import
ClassVar
import
torch
...
...
@@ -12,11 +11,16 @@ from vllm.config.vllm import VllmConfig
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.v1.attention.backends.utils
import
(
AttentionCGSupport
,
AttentionMetadataBuilder
,
CommonAttentionMetadata
,
make_local_attention_virtual_batches
,
subclass_attention_backend
,
)
from
vllm.v1.kv_cache_interface
import
ChunkedLocalAttentionSpec
,
KVCacheSpec
from
vllm.v1.kv_cache_interface
import
(
AttentionSpec
,
ChunkedLocalAttentionSpec
,
KVCacheSpec
,
)
from
..layer
import
Attention
...
...
@@ -30,9 +34,18 @@ def create_chunked_local_attention_backend(
prefix
=
f
"ChunkedLocalAttention_
{
attention_chunk_size
}
_
{
block_size
}
_"
underlying_builder
=
underlying_attn_backend
.
get_builder_cls
()
assert
issubclass
(
underlying_builder
,
AttentionMetadataBuilder
)
class
ChunkedLocalAttentionBuilder
(
underlying_builder
):
# type: ignore
_cudagraph_support
:
ClassVar
[
AttentionCGSupport
]
=
AttentionCGSupport
.
NEVER
@
classmethod
def
get_cudagraph_support
(
cls
:
type
[
"AttentionMetadataBuilder"
],
vllm_config
:
VllmConfig
,
kv_cache_spec
:
AttentionSpec
,
)
->
AttentionCGSupport
:
# Explicit override in case the underlying builder specialized this getter.
# @override omitted only because of mypy limitation due to type variable.
return
AttentionCGSupport
.
NEVER
def
build
(
self
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment