[Bugfix][ROCm] Fix ViT rotary embeddings for torch.compile compatibility on ROCm (#27748)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>

[Bugfix][ROCm] Fix ViT rotary embeddings for torch.compile compatibility on ROCm (#27748)
Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
b13a4475 · vllmellm · GitHub · 7956b0c0 · b13a4475 · b13a4475
Unverified Commit b13a4475 authored Nov 04, 2025 by vllmellm Committed by GitHub Nov 03, 2025
Showing with 8 additions and 5 deletions

vllm/model_executor/layers/rotary_embedding/common.py vllm/model_executor/layers/rotary_embedding/common.py +7 -4

vllm/model_executor/models/glm4_1v.py vllm/model_executor/models/glm4_1v.py +1 -1

No files found.
--- a/vllm/model_executor/layers/rotary_embedding/common.py
+++ b/vllm/model_executor/layers/rotary_embedding/common.py
@@ -77,7 +77,11 @@ def dispatch_rotary_emb_function(
    if current_platform.is_cuda():
        return apply_rotary_emb

-    if current_platform.is_rocm():
+    # if torch compile is not enabled
+    # use rotary embedding function from flash_attn package
+    # otherwise use the naive pytorch embedding implementation
+    # is faster when torch compile is enabled.
+    if current_platform.is_rocm() and not torch.compiler.is_compiling():
        if find_spec("flash_attn") is not None:
            from flash_attn.ops.triton.rotary import apply_rotary

@@ -87,11 +91,10 @@ def dispatch_rotary_emb_function(
                "flash_attn is not installed. Falling back to PyTorch "
                "implementation for rotary embeddings."
            )
-
    if default is not None:
        return default
-    else:
-        return apply_rotary_emb_torch
+
+    return apply_rotary_emb_torch


 # yarn functions

--- a/vllm/model_executor/models/glm4_1v.py
+++ b/vllm/model_executor/models/glm4_1v.py
@@ -370,7 +370,7 @@ class Glm4vVisionAttention(nn.Module):
                cu_seqlens_k=cu_seqlens,
                max_seqlen_q=max_seqlen,
                max_seqlen_k=max_seqlen,
-                dropout_p=0,
+                dropout_p=0.0,
                causal=False,
            )