[BugFix] ChunkedLocalAttention is currently not CG compatible (#26034)

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com> Signed-off-by: simon-mo <simon.mo@hey.com>

[BugFix] ChunkedLocalAttention is currently not CG compatible (#26034)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com> Signed-off-by: simon-mo <simon.mo@hey.com>
c536881a · Lucas Wilkinson · simon-mo · ebce361c · c536881a
Commit c536881a authored Oct 01, 2025 by Lucas Wilkinson Committed by simon-mo Oct 02, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 3 deletions

vllm/attention/layers/chunked_local_attention.py vllm/attention/layers/chunked_local_attention.py +5 -3

No files found.
--- a/vllm/attention/layers/chunked_local_attention.py
+++ b/vllm/attention/layers/chunked_local_attention.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import functools
-from typing import List, Optional
+from typing import ClassVar, List, Optional

 import torch

@@ -11,8 +11,8 @@ from vllm.attention.backends.abstract import (AttentionBackend,
 from vllm.attention.selector import get_attn_backend
 from vllm.config import CacheConfig, QuantizationConfig
 from vllm.v1.attention.backends.utils import (
-    CommonAttentionMetadata, make_local_attention_virtual_batches,
-    subclass_attention_backend)
+    AttentionCGSupport, CommonAttentionMetadata,
+    make_local_attention_virtual_batches, subclass_attention_backend)

 from ..layer import Attention

@@ -28,6 +28,8 @@ def create_chunked_local_attention_backend(
    underlying_builder = underlying_attn_backend.get_builder_cls()

    class ChunkedLocalAttentionBuilder(underlying_builder):  # type: ignore
+        cudagraph_support: ClassVar[AttentionCGSupport] = \
+            AttentionCGSupport.NEVER

        def build(self,
                  common_prefix_len: int,