[CI Failure] Disable FlashInfer RoPE to unblock CI (#25299)

Signed-off-by: mgoin <mgoin64@gmail.com>

[CI Failure] Disable FlashInfer RoPE to unblock CI (#25299)
Signed-off-by: mgoin <mgoin64@gmail.com>
e08a3a3f · Michael Goin · GitHub · 3d9a1d2d · e08a3a3f
Unverified Commit e08a3a3f authored Sep 20, 2025 by Michael Goin Committed by GitHub Sep 20, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 7 deletions

vllm/model_executor/layers/rotary_embedding/base.py vllm/model_executor/layers/rotary_embedding/base.py +7 -7

No files found.
--- a/vllm/model_executor/layers/rotary_embedding/base.py
+++ b/vllm/model_executor/layers/rotary_embedding/base.py
@@ -6,8 +6,6 @@ from typing import Optional
 import torch
 from vllm.model_executor.custom_op import CustomOp
-from vllm.platforms import current_platform
-from vllm.utils.flashinfer import has_flashinfer
 from .common import apply_rotary_emb_torch
@@ -32,13 +30,15 @@ class RotaryEmbedding(CustomOp):
        self.base = base
        self.is_neox_style = is_neox_style
        self.dtype = dtype
+        # TODO(mgoin): disabled for now due to failures
        # Flashinfer only supports head_size=64, 128, 256, 512.
        # https://github.com/flashinfer-ai/flashinfer/blob/ebfd655efe830048dba5d582aaa61d61d1cf9a87/include/flashinfer/utils.cuh#L174-L202
-        self.use_flashinfer = (self.enabled()
+        # self.use_flashinfer = (self.enabled()
-                               and dtype in (torch.float16, torch.bfloat16)
+        #                        and dtype in (torch.float16, torch.bfloat16)
-                               and current_platform.is_cuda()
+        #                        and current_platform.is_cuda()
-                               and has_flashinfer()
+        #                        and has_flashinfer()
-                               and self.head_size in [64, 128, 256, 512])
+        #                        and self.head_size in [64, 128, 256, 512])
+        self.use_flashinfer = False
        cache = self._compute_cos_sin_cache()
        if not self.use_flashinfer: