"vscode:/vscode.git/clone" did not exist on "05cae69f0f4f0237779c45a3907aa13ee5a07667"
Unverified Commit 5b044810 authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

[Bug] Raise error explicitly if using incompatible backend (#27424)


Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
parent f7a66828
......@@ -261,6 +261,21 @@ class CudaPlatformBase(Platform):
from vllm.attention.backends.registry import _Backend
if use_mla:
# explicitly reject non-MLA backends when MLA is enabled to avoid
# silently selecting an incompatible backend (e.g., FLASHINFER).
if selected_backend in {
_Backend.FLASHINFER,
_Backend.FLASH_ATTN,
_Backend.TRITON_ATTN,
_Backend.TREE_ATTN,
_Backend.XFORMERS,
}:
raise ValueError(
f"Attention backend {selected_backend} incompatible with MLA. "
"Please use one of the MLA backends: FLASHINFER_MLA, CUTLASS_MLA, "
"FLASHMLA, FLASH_ATTN_MLA, or TRITON_MLA. Alternatively, set "
"VLLM_MLA_DISABLE=1 to disable MLA for this model."
)
if not use_v1:
raise RuntimeError(
"MLA attention backends require the V1 engine. "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment