Merge branch 'v0.9.2-dev' into v0.9.2-dev-ds

0ff29dbf · zhuwenwen · e0ba23b5 · 8c0143db · 0ff29dbf · 0ff29dbf
Commit 0ff29dbf authored Oct 05, 2025 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 10 additions and 3 deletions

vllm/model_executor/layers/rotary_embedding.py vllm/model_executor/layers/rotary_embedding.py +2 -3

vllm/platforms/rocm.py vllm/platforms/rocm.py +8 -0

No files found.
--- a/vllm/model_executor/layers/rotary_embedding.py
+++ b/vllm/model_executor/layers/rotary_embedding.py
@@ -916,7 +916,7 @@ class DeepseekScalingRotaryEmbedding(RotaryEmbedding):
    direct_register_custom_op(
        op_name="rotary_embedding_deepseek_fuse",
        op_func=rotary_embedding_deepseek_fuse,
-        mutates_args=[], 
+        mutates_args=["query", "key"], 
        fake_impl=rotary_embedding_deepseek_fuse_fake,
    )
    
@@ -958,8 +958,7 @@ class DeepseekScalingRotaryEmbedding(RotaryEmbedding):
                    BLOCK_SIZE=BLOCK_SIZE,
                    num_warps=1)

-            # if envs.VLLM_USE_LIGHTOP:
-            if False:
+            if envs.VLLM_USE_LIGHTOP:
                torch.ops.vllm.rotary_embedding_deepseek_fuse(positions, query, key, self.head_size, self.cos_sin_cache, self.is_neox_style)
            else:
                call(query)

--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -16,6 +16,14 @@ from vllm.utils import cuda_device_count_stateless

 from .interface import DeviceCapability, Platform, PlatformEnum, _Backend

+from vllm.utils import SUPPORT_TC
+
+if not SUPPORT_TC:
+    os.environ['VLLM_USE_V1'] = '0'
+    os.environ['VLLM_USE_FLASH_ATTN_PA'] = '0'
+    os.environ['VLLM_USE_FLASH_MLA'] = '0'
+    
+
 if TYPE_CHECKING:
    from vllm.config import ModelConfig, VllmConfig