Unverified Commit a9e532af authored by tvirolai-amd's avatar tvirolai-amd Committed by GitHub
Browse files

[ROCm][Perf] Allow MTP lens > 1 in Sparse MLA (#36681)


Signed-off-by: default avatarTeemu Virolainen <teemu.virolainen@amd.com>
parent f3163bba
...@@ -214,11 +214,15 @@ class SpecDecodeBaseProposer: ...@@ -214,11 +214,15 @@ class SpecDecodeBaseProposer:
# Determine allowed attention backends once during initialization. # Determine allowed attention backends once during initialization.
self.allowed_attn_types: tuple | None = None self.allowed_attn_types: tuple | None = None
if current_platform.is_rocm(): if current_platform.is_rocm():
from vllm.v1.attention.backends.mla.rocm_aiter_mla_sparse import (
ROCMAiterMLASparseMetadata,
)
from vllm.v1.attention.backends.rocm_attn import RocmAttentionMetadata from vllm.v1.attention.backends.rocm_attn import RocmAttentionMetadata
rocm_types = [ rocm_types = [
TritonAttentionMetadata, TritonAttentionMetadata,
RocmAttentionMetadata, RocmAttentionMetadata,
ROCMAiterMLASparseMetadata,
] ]
# ROCM_AITER_FA is an optional backend # ROCM_AITER_FA is an optional backend
# We check is_enabled() here to avoid importing the backend module during # We check is_enabled() here to avoid importing the backend module during
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment