Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2a03f93d
Unverified
Commit
2a03f93d
authored
Oct 08, 2025
by
Matthew Bonanni
Committed by
GitHub
Oct 08, 2025
Browse files
[Attention] Register FLASHMLA_SPARSE (#26441)
Signed-off-by:
Matthew Bonanni
<
mbonanni@redhat.com
>
parent
da364615
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
3 additions
and
1 deletion
+3
-1
vllm/attention/backends/registry.py
vllm/attention/backends/registry.py
+2
-0
vllm/v1/attention/backends/mla/flashmla_sparse.py
vllm/v1/attention/backends/mla/flashmla_sparse.py
+1
-1
No files found.
vllm/attention/backends/registry.py
View file @
2a03f93d
...
@@ -21,6 +21,7 @@ class _Backend(enum.Enum):
...
@@ -21,6 +21,7 @@ class _Backend(enum.Enum):
TRITON_MLA
=
enum
.
auto
()
TRITON_MLA
=
enum
.
auto
()
CUTLASS_MLA
=
enum
.
auto
()
CUTLASS_MLA
=
enum
.
auto
()
FLASHMLA
=
enum
.
auto
()
FLASHMLA
=
enum
.
auto
()
FLASHMLA_SPARSE
=
enum
.
auto
()
FLASH_ATTN_MLA
=
enum
.
auto
()
FLASH_ATTN_MLA
=
enum
.
auto
()
PALLAS
=
enum
.
auto
()
PALLAS
=
enum
.
auto
()
IPEX
=
enum
.
auto
()
IPEX
=
enum
.
auto
()
...
@@ -43,6 +44,7 @@ BACKEND_MAP = {
...
@@ -43,6 +44,7 @@ BACKEND_MAP = {
_Backend
.
TRITON_MLA
:
"vllm.v1.attention.backends.mla.triton_mla.TritonMLABackend"
,
# noqa: E501
_Backend
.
TRITON_MLA
:
"vllm.v1.attention.backends.mla.triton_mla.TritonMLABackend"
,
# noqa: E501
_Backend
.
CUTLASS_MLA
:
"vllm.v1.attention.backends.mla.cutlass_mla.CutlassMLABackend"
,
# noqa: E501
_Backend
.
CUTLASS_MLA
:
"vllm.v1.attention.backends.mla.cutlass_mla.CutlassMLABackend"
,
# noqa: E501
_Backend
.
FLASHMLA
:
"vllm.v1.attention.backends.mla.flashmla.FlashMLABackend"
,
# noqa: E501
_Backend
.
FLASHMLA
:
"vllm.v1.attention.backends.mla.flashmla.FlashMLABackend"
,
# noqa: E501
_Backend
.
FLASHMLA_SPARSE
:
"vllm.v1.attention.backends.mla.flashmla_sparse.FlashMLASparseBackend"
,
# noqa: E501
_Backend
.
FLASH_ATTN_MLA
:
"vllm.v1.attention.backends.mla.flashattn_mla.FlashAttnMLABackend"
,
# noqa: E501
_Backend
.
FLASH_ATTN_MLA
:
"vllm.v1.attention.backends.mla.flashattn_mla.FlashAttnMLABackend"
,
# noqa: E501
_Backend
.
PALLAS
:
"vllm.v1.attention.backends.pallas.PallasAttentionBackend"
,
# noqa: E501
_Backend
.
PALLAS
:
"vllm.v1.attention.backends.pallas.PallasAttentionBackend"
,
# noqa: E501
_Backend
.
FLEX_ATTENTION
:
"vllm.v1.attention.backends.flex_attention.FlexAttentionBackend"
,
# noqa: E501
_Backend
.
FLEX_ATTENTION
:
"vllm.v1.attention.backends.flex_attention.FlexAttentionBackend"
,
# noqa: E501
...
...
vllm/v1/attention/backends/mla/flashmla_sparse.py
View file @
2a03f93d
...
@@ -55,7 +55,7 @@ class FlashMLASparseBackend(AttentionBackend):
...
@@ -55,7 +55,7 @@ class FlashMLASparseBackend(AttentionBackend):
@
staticmethod
@
staticmethod
def
get_name
()
->
str
:
def
get_name
()
->
str
:
return
"FLASHMLA_SPARSE
_VLLM_V1
"
return
"FLASHMLA_SPARSE"
@
staticmethod
@
staticmethod
def
get_metadata_cls
()
->
type
[
AttentionMetadata
]:
def
get_metadata_cls
()
->
type
[
AttentionMetadata
]:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment