Commit e5f2ff72 authored by zhuwenwen's avatar zhuwenwen
Browse files

skip aiter

parent 0386844b
...@@ -1943,7 +1943,6 @@ class FusedMoE(CustomOp): ...@@ -1943,7 +1943,6 @@ class FusedMoE(CustomOp):
topk_weights, topk_ids = self.router.select_experts( topk_weights, topk_ids = self.router.select_experts(
hidden_states=x_orig, hidden_states=x_orig,
router_logits=router_logits, router_logits=router_logits,
# use_fused_gate=use_fused_gate,
) )
if self.capture is not None: if self.capture is not None:
......
...@@ -167,36 +167,36 @@ class SpecDecodeBaseProposer: ...@@ -167,36 +167,36 @@ class SpecDecodeBaseProposer:
# Determine allowed attention backends once during initialization. # Determine allowed attention backends once during initialization.
self.allowed_attn_types: tuple | None = None self.allowed_attn_types: tuple | None = None
if current_platform.is_rocm(): # if current_platform.is_rocm():
from vllm.v1.attention.backends.rocm_attn import RocmAttentionMetadata # from vllm.v1.attention.backends.rocm_attn import RocmAttentionMetadata
rocm_types = [
TritonAttentionMetadata,
RocmAttentionMetadata,
]
# ROCM_AITER_FA is an optional backend
if find_spec(
AttentionBackendEnum.ROCM_AITER_FA.get_path(include_classname=False)
):
from vllm.v1.attention.backends.rocm_aiter_fa import (
AiterFlashAttentionMetadata,
)
rocm_types.append(AiterFlashAttentionMetadata) # rocm_types = [
# TritonAttentionMetadata,
# RocmAttentionMetadata,
# ]
# # ROCM_AITER_FA is an optional backend
# if find_spec(
# AttentionBackendEnum.ROCM_AITER_FA.get_path(include_classname=False)
# ):
# from vllm.v1.attention.backends.rocm_aiter_fa import (
# AiterFlashAttentionMetadata,
# )
# TRITON_MLA backend support for MLA models (e.g., DeepSeek) # rocm_types.append(AiterFlashAttentionMetadata)
from vllm.model_executor.layers.attention.mla_attention import (
MLACommonMetadata, # # TRITON_MLA backend support for MLA models (e.g., DeepSeek)
) # from vllm.model_executor.layers.attention.mla_attention import (
# MLACommonMetadata,
# )
rocm_types.append(MLACommonMetadata) # rocm_types.append(MLACommonMetadata)
# FlexAttention backend support # # FlexAttention backend support
from vllm.v1.attention.backends.flex_attention import FlexAttentionMetadata # from vllm.v1.attention.backends.flex_attention import FlexAttentionMetadata
rocm_types.append(FlexAttentionMetadata) # rocm_types.append(FlexAttentionMetadata)
self.allowed_attn_types = tuple(rocm_types) # self.allowed_attn_types = tuple(rocm_types)
# Parse the speculative token tree. # Parse the speculative token tree.
spec_token_tree = self.speculative_config.speculative_token_tree spec_token_tree = self.speculative_config.speculative_token_tree
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment