Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4134312b
Unverified
Commit
4134312b
authored
Oct 01, 2025
by
Lucas Wilkinson
Committed by
GitHub
Oct 01, 2025
Browse files
[BugFix] ChunkedLocalAttention is currently not CG compatible (#26034)
Signed-off-by:
Lucas Wilkinson
<
lwilkins@redhat.com
>
parent
da554f93
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
5 additions
and
3 deletions
+5
-3
vllm/attention/layers/chunked_local_attention.py
vllm/attention/layers/chunked_local_attention.py
+5
-3
No files found.
vllm/attention/layers/chunked_local_attention.py
View file @
4134312b
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
functools
import
functools
from
typing
import
List
,
Optional
from
typing
import
ClassVar
,
List
,
Optional
import
torch
import
torch
...
@@ -12,8 +12,8 @@ from vllm.attention.selector import get_attn_backend
...
@@ -12,8 +12,8 @@ from vllm.attention.selector import get_attn_backend
from
vllm.config
import
CacheConfig
from
vllm.config
import
CacheConfig
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.v1.attention.backends.utils
import
(
from
vllm.v1.attention.backends.utils
import
(
Common
Attention
Metadata
,
make_local_attention_virtual_batches
,
Attention
CGSupport
,
CommonAttentionMetadata
,
subclass_attention_backend
)
make_local_attention_virtual_batches
,
subclass_attention_backend
)
from
..layer
import
Attention
from
..layer
import
Attention
...
@@ -29,6 +29,8 @@ def create_chunked_local_attention_backend(
...
@@ -29,6 +29,8 @@ def create_chunked_local_attention_backend(
underlying_builder
=
underlying_attn_backend
.
get_builder_cls
()
underlying_builder
=
underlying_attn_backend
.
get_builder_cls
()
class
ChunkedLocalAttentionBuilder
(
underlying_builder
):
# type: ignore
class
ChunkedLocalAttentionBuilder
(
underlying_builder
):
# type: ignore
cudagraph_support
:
ClassVar
[
AttentionCGSupport
]
=
\
AttentionCGSupport
.
NEVER
def
build
(
self
,
def
build
(
self
,
common_prefix_len
:
int
,
common_prefix_len
:
int
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment