Unverified Commit 11857a00 authored by Matthew Bonanni's avatar Matthew Bonanni Committed by GitHub
Browse files

[Attention] Add ROCM_AITER_MLA_SPARSE to attention backend registry (#29103)


Signed-off-by: default avatarMatthew Bonanni <mbonanni@redhat.com>
parent 8c25f9cf
...@@ -52,6 +52,9 @@ class AttentionBackendEnum(Enum, metaclass=_AttentionBackendEnumMeta): ...@@ -52,6 +52,9 @@ class AttentionBackendEnum(Enum, metaclass=_AttentionBackendEnumMeta):
ROCM_AITER_FA = ( ROCM_AITER_FA = (
"vllm.v1.attention.backends.rocm_aiter_fa.AiterFlashAttentionBackend" "vllm.v1.attention.backends.rocm_aiter_fa.AiterFlashAttentionBackend"
) )
ROCM_AITER_MLA_SPARSE = (
"vllm.v1.attention.backends.mla.rocm_aiter_mla_sparse.ROCMAiterMLASparseBackend"
)
TORCH_SDPA = "" # this tag is only used for ViT TORCH_SDPA = "" # this tag is only used for ViT
FLASHINFER = "vllm.v1.attention.backends.flashinfer.FlashInferBackend" FLASHINFER = "vllm.v1.attention.backends.flashinfer.FlashInferBackend"
FLASHINFER_MLA = ( FLASHINFER_MLA = (
......
...@@ -233,10 +233,7 @@ class RocmPlatform(Platform): ...@@ -233,10 +233,7 @@ class RocmPlatform(Platform):
"Sparse MLA backend on ROCm only supports block size 1 for now." "Sparse MLA backend on ROCm only supports block size 1 for now."
) )
logger.info_once("Using Sparse MLA backend on V1 engine.") logger.info_once("Using Sparse MLA backend on V1 engine.")
return ( return AttentionBackendEnum.ROCM_AITER_MLA_SPARSE.get_path()
"vllm.v1.attention.backends.mla.rocm_aiter_mla_sparse."
"ROCMAiterMLASparseBackend"
)
if use_mla: if use_mla:
if selected_backend is None: if selected_backend is None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment