fix cuda graph (#22721)

Signed-off-by: fsx950223 <fsx950223@outlook.com>

fix cuda graph (#22721)
Signed-off-by: fsx950223 <fsx950223@outlook.com>
d983769c · who who who · GitHub · 8fd92092 · d983769c
Unverified Commit d983769c authored Aug 20, 2025 by who who who Committed by GitHub Aug 20, 2025
Show whitespace changes
Inline Side-by-side

Showing with 4 additions and 3 deletions

vllm/v1/attention/backends/rocm_aiter_fa.py vllm/v1/attention/backends/rocm_aiter_fa.py +4 -3

No files found.
--- a/vllm/v1/attention/backends/rocm_aiter_fa.py
+++ b/vllm/v1/attention/backends/rocm_aiter_fa.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """Attention layer with AiterFlashAttention."""
 from dataclasses import dataclass
-from typing import ClassVar, Optional
+from typing import Optional

 import torch

@@ -11,7 +11,8 @@ from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
 from vllm.config import VllmConfig
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
-from vllm.v1.attention.backends.utils import (AttentionMetadataBuilder,
+from vllm.v1.attention.backends.utils import (AttentionCGSupport,
+                                              AttentionMetadataBuilder,
                                              CommonAttentionMetadata)
 from vllm.v1.kv_cache_interface import AttentionSpec

@@ -231,7 +232,7 @@ class AiterFlashAttentionMetadata:

 class AiterFlashAttentionMetadataBuilder(
        AttentionMetadataBuilder[AiterFlashAttentionMetadata]):
-    full_cudagraph_supported: ClassVar[bool] = True
+    cudagraph_support = AttentionCGSupport.ALWAYS

    def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
                 vllm_config: VllmConfig, device: torch.device):