add triton_key

62920e37 · zhuwenwen · abf008ef · 62920e37
Commit 62920e37 authored Apr 29, 2025 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 1 deletion

vllm/attention/backends/rocm_flash_attn.py vllm/attention/backends/rocm_flash_attn.py +4 -1

No files found.
--- a/vllm/attention/backends/rocm_flash_attn.py
+++ b/vllm/attention/backends/rocm_flash_attn.py
@@ -5,6 +5,8 @@ from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type
 import torch
+import triton
+from triton.compiler.compiler import triton_key
 import vllm.envs as envs
 from vllm import _custom_ops as ops
@@ -30,7 +32,7 @@ _ON_NAVI = "gfx1" in _GPU_ARCH
 _ON_MI250_MI300 = any(arch in _GPU_ARCH
                      for arch in ["gfx90a", "gfx940", "gfx941", "gfx942"])
 class ROCmFlashAttentionBackend(AttentionBackend):
    @staticmethod
@@ -778,6 +780,7 @@ class ROCmFlashAttentionImpl(AttentionImpl):
            else:
                # prefix-enabled attention
                # not applicable for encoder-only models
+                version_key = triton_key()
                if self.attn_type != AttentionType.ENCODER_ONLY:
                    output[:
                           num_prefill_tokens] = PagedAttention.forward_prefix(