"vllm/vscode:/vscode.git/clone" did not exist on "583a90e0055f3fa1fc464b2dd62ea26a7d389ae1"
Commit e5f2ff72 authored by zhuwenwen's avatar zhuwenwen
Browse files

skip aiter

parent 0386844b
...@@ -1943,7 +1943,6 @@ class FusedMoE(CustomOp): ...@@ -1943,7 +1943,6 @@ class FusedMoE(CustomOp):
topk_weights, topk_ids = self.router.select_experts( topk_weights, topk_ids = self.router.select_experts(
hidden_states=x_orig, hidden_states=x_orig,
router_logits=router_logits, router_logits=router_logits,
# use_fused_gate=use_fused_gate,
) )
if self.capture is not None: if self.capture is not None:
......
...@@ -167,36 +167,36 @@ class SpecDecodeBaseProposer: ...@@ -167,36 +167,36 @@ class SpecDecodeBaseProposer:
# Determine allowed attention backends once during initialization. # Determine allowed attention backends once during initialization.
self.allowed_attn_types: tuple | None = None self.allowed_attn_types: tuple | None = None
if current_platform.is_rocm(): # if current_platform.is_rocm():
from vllm.v1.attention.backends.rocm_attn import RocmAttentionMetadata # from vllm.v1.attention.backends.rocm_attn import RocmAttentionMetadata
rocm_types = [ # rocm_types = [
TritonAttentionMetadata, # TritonAttentionMetadata,
RocmAttentionMetadata, # RocmAttentionMetadata,
] # ]
# ROCM_AITER_FA is an optional backend # # ROCM_AITER_FA is an optional backend
if find_spec( # if find_spec(
AttentionBackendEnum.ROCM_AITER_FA.get_path(include_classname=False) # AttentionBackendEnum.ROCM_AITER_FA.get_path(include_classname=False)
): # ):
from vllm.v1.attention.backends.rocm_aiter_fa import ( # from vllm.v1.attention.backends.rocm_aiter_fa import (
AiterFlashAttentionMetadata, # AiterFlashAttentionMetadata,
) # )
rocm_types.append(AiterFlashAttentionMetadata) # rocm_types.append(AiterFlashAttentionMetadata)
# TRITON_MLA backend support for MLA models (e.g., DeepSeek) # # TRITON_MLA backend support for MLA models (e.g., DeepSeek)
from vllm.model_executor.layers.attention.mla_attention import ( # from vllm.model_executor.layers.attention.mla_attention import (
MLACommonMetadata, # MLACommonMetadata,
) # )
rocm_types.append(MLACommonMetadata) # rocm_types.append(MLACommonMetadata)
# FlexAttention backend support # # FlexAttention backend support
from vllm.v1.attention.backends.flex_attention import FlexAttentionMetadata # from vllm.v1.attention.backends.flex_attention import FlexAttentionMetadata
rocm_types.append(FlexAttentionMetadata) # rocm_types.append(FlexAttentionMetadata)
self.allowed_attn_types = tuple(rocm_types) # self.allowed_attn_types = tuple(rocm_types)
# Parse the speculative token tree. # Parse the speculative token tree.
spec_token_tree = self.speculative_config.speculative_token_tree spec_token_tree = self.speculative_config.speculative_token_tree
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment