update flash-attn interface to support keye

4d53d14c · zhuwenwen · a5f106eb · 4d53d14c
Commit 4d53d14c authored Sep 09, 2025 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 1 deletion

vllm/model_executor/models/keye.py vllm/model_executor/models/keye.py +5 -1

No files found.
--- a/vllm/model_executor/models/keye.py
+++ b/vllm/model_executor/models/keye.py
@@ -54,6 +54,7 @@ from .utils import (AutoWeightsLoader, WeightsMapper,
                    init_vllm_registered_model, is_pp_missing_parameter,
                    maybe_prefix, merge_multimodal_embeddings)
 from .vision import get_vit_attn_backend
+from vllm.platforms import current_platform
 logger = init_logger(__name__)
@@ -330,7 +331,10 @@ def apply_rotary_pos_emb_flashatt(
    cos = cos.chunk(2, dim=-1)[0].contiguous()
    sin = sin.chunk(2, dim=-1)[0].contiguous()
-    from vllm.vllm_flash_attn.layers.rotary import apply_rotary_emb
+    if not current_platform.is_rocm():
+        from vllm.vllm_flash_attn.layers.rotary import apply_rotary_emb
+    else:
+        from flash_attn.layers.rotary import apply_rotary_emb
    q_embed = apply_rotary_emb(q.float(), cos.float(), sin.float()).type_as(q)
    k_embed = apply_rotary_emb(k.float(), cos.float(), sin.float()).type_as(k)