"vscode:/vscode.git/clone" did not exist on "75648b16ddce1bff02c39c6f06be62a58385ff52"
Unverified Commit a9e532af authored by tvirolai-amd's avatar tvirolai-amd Committed by GitHub
Browse files

[ROCm][Perf] Allow MTP lens > 1 in Sparse MLA (#36681)


Signed-off-by: default avatarTeemu Virolainen <teemu.virolainen@amd.com>
parent f3163bba
......@@ -214,11 +214,15 @@ class SpecDecodeBaseProposer:
# Determine allowed attention backends once during initialization.
self.allowed_attn_types: tuple | None = None
if current_platform.is_rocm():
from vllm.v1.attention.backends.mla.rocm_aiter_mla_sparse import (
ROCMAiterMLASparseMetadata,
)
from vllm.v1.attention.backends.rocm_attn import RocmAttentionMetadata
rocm_types = [
TritonAttentionMetadata,
RocmAttentionMetadata,
ROCMAiterMLASparseMetadata,
]
# ROCM_AITER_FA is an optional backend
# We check is_enabled() here to avoid importing the backend module during
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment