Commit e5f2ff72 authored by zhuwenwen's avatar zhuwenwen
Browse files

skip aiter

parent 0386844b
......@@ -1943,7 +1943,6 @@ class FusedMoE(CustomOp):
topk_weights, topk_ids = self.router.select_experts(
hidden_states=x_orig,
router_logits=router_logits,
# use_fused_gate=use_fused_gate,
)
if self.capture is not None:
......
......@@ -167,36 +167,36 @@ class SpecDecodeBaseProposer:
# Determine allowed attention backends once during initialization.
self.allowed_attn_types: tuple | None = None
if current_platform.is_rocm():
from vllm.v1.attention.backends.rocm_attn import RocmAttentionMetadata
# if current_platform.is_rocm():
# from vllm.v1.attention.backends.rocm_attn import RocmAttentionMetadata
rocm_types = [
TritonAttentionMetadata,
RocmAttentionMetadata,
]
# ROCM_AITER_FA is an optional backend
if find_spec(
AttentionBackendEnum.ROCM_AITER_FA.get_path(include_classname=False)
):
from vllm.v1.attention.backends.rocm_aiter_fa import (
AiterFlashAttentionMetadata,
)
# rocm_types = [
# TritonAttentionMetadata,
# RocmAttentionMetadata,
# ]
# # ROCM_AITER_FA is an optional backend
# if find_spec(
# AttentionBackendEnum.ROCM_AITER_FA.get_path(include_classname=False)
# ):
# from vllm.v1.attention.backends.rocm_aiter_fa import (
# AiterFlashAttentionMetadata,
# )
rocm_types.append(AiterFlashAttentionMetadata)
# rocm_types.append(AiterFlashAttentionMetadata)
# TRITON_MLA backend support for MLA models (e.g., DeepSeek)
from vllm.model_executor.layers.attention.mla_attention import (
MLACommonMetadata,
)
# # TRITON_MLA backend support for MLA models (e.g., DeepSeek)
# from vllm.model_executor.layers.attention.mla_attention import (
# MLACommonMetadata,
# )
rocm_types.append(MLACommonMetadata)
# rocm_types.append(MLACommonMetadata)
# FlexAttention backend support
from vllm.v1.attention.backends.flex_attention import FlexAttentionMetadata
# # FlexAttention backend support
# from vllm.v1.attention.backends.flex_attention import FlexAttentionMetadata
rocm_types.append(FlexAttentionMetadata)
# rocm_types.append(FlexAttentionMetadata)
self.allowed_attn_types = tuple(rocm_types)
# self.allowed_attn_types = tuple(rocm_types)
# Parse the speculative token tree.
spec_token_tree = self.speculative_config.speculative_token_tree
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment