Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b42566f4
Unverified
Commit
b42566f4
authored
Sep 15, 2025
by
Wentao Ye
Committed by
GitHub
Sep 15, 2025
Browse files
[Bug] Fix `is_flashmla_supported` Check Error (#24774)
Signed-off-by:
yewentao256
<
zhyanwentao@126.com
>
parent
d96e1116
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
4 additions
and
26 deletions
+4
-26
vllm/attention/backends/flashmla.py
vllm/attention/backends/flashmla.py
+2
-13
vllm/v1/attention/backends/mla/flashmla.py
vllm/v1/attention/backends/mla/flashmla.py
+2
-13
No files found.
vllm/attention/backends/flashmla.py
View file @
b42566f4
...
@@ -17,7 +17,6 @@ from vllm.attention.backends.mla.common import (MLACommonBackend,
...
@@ -17,7 +17,6 @@ from vllm.attention.backends.mla.common import (MLACommonBackend,
from
vllm.attention.ops.flashmla
import
(
flash_mla_with_kvcache
,
from
vllm.attention.ops.flashmla
import
(
flash_mla_with_kvcache
,
get_mla_metadata
,
get_mla_metadata
,
is_flashmla_supported
)
is_flashmla_supported
)
from
vllm.platforms.cuda
import
CudaPlatform
class
FlashMLABackend
(
MLACommonBackend
):
class
FlashMLABackend
(
MLACommonBackend
):
...
@@ -179,18 +178,8 @@ class FlashMLAImpl(MLACommonImpl[FlashMLAMetadata]):
...
@@ -179,18 +178,8 @@ class FlashMLAImpl(MLACommonImpl[FlashMLAMetadata]):
logits_soft_cap
,
attn_type
,
logits_soft_cap
,
attn_type
,
kv_sharing_target_layer_name
,
**
mla_args
)
kv_sharing_target_layer_name
,
**
mla_args
)
assert
is_flashmla_supported
(),
\
is_supported
,
reason
=
is_flashmla_supported
()
"FlashMLA is not supported on this device"
assert
is_supported
,
reason
# disallow FlashMLA on NVIDIA Blackwell (SM 10.0+) GPUs
# context:
# https://github.com/deepseek-ai/FlashMLA/issues/83
# https://github.com/vllm-project/vllm/issues/24513
if
CudaPlatform
.
has_device_capability
(
100
):
raise
NotImplementedError
(
"FlashMLA is temporarily disabled on Blackwell (SM 10.0). "
"Please use CUTLASS_MLA or TRITON_MLA instead. "
"Example: `export VLLM_ATTENTION_BACKEND=CUTLASS_MLA`"
)
unsupported_features
=
[
alibi_slopes
,
sliding_window
,
logits_soft_cap
]
unsupported_features
=
[
alibi_slopes
,
sliding_window
,
logits_soft_cap
]
if
any
(
unsupported_features
):
if
any
(
unsupported_features
):
...
...
vllm/v1/attention/backends/mla/flashmla.py
View file @
b42566f4
...
@@ -12,7 +12,6 @@ from vllm.attention.ops.flashmla import (flash_mla_with_kvcache,
...
@@ -12,7 +12,6 @@ from vllm.attention.ops.flashmla import (flash_mla_with_kvcache,
is_flashmla_supported
)
is_flashmla_supported
)
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.platforms.cuda
import
CudaPlatform
from
vllm.v1.attention.backends.mla.common
import
(
MLACommonBackend
,
from
vllm.v1.attention.backends.mla.common
import
(
MLACommonBackend
,
MLACommonDecodeMetadata
,
MLACommonDecodeMetadata
,
MLACommonImpl
,
MLACommonImpl
,
...
@@ -156,18 +155,8 @@ class FlashMLAImpl(MLACommonImpl[FlashMLAMetadata]):
...
@@ -156,18 +155,8 @@ class FlashMLAImpl(MLACommonImpl[FlashMLAMetadata]):
logits_soft_cap
,
attn_type
,
logits_soft_cap
,
attn_type
,
kv_sharing_target_layer_name
,
**
mla_args
)
kv_sharing_target_layer_name
,
**
mla_args
)
assert
is_flashmla_supported
(),
\
is_supported
,
reason
=
is_flashmla_supported
()
"FlashMLA is not supported on this device"
assert
is_supported
,
reason
# disallow FlashMLA on NVIDIA Blackwell (SM 10.0+) GPUs
# context:
# https://github.com/deepseek-ai/FlashMLA/issues/83
# https://github.com/vllm-project/vllm/issues/24513
if
CudaPlatform
.
has_device_capability
(
100
):
raise
NotImplementedError
(
"FlashMLA is temporarily disabled on Blackwell (SM 10.0). "
"Please use CUTLASS_MLA or TRITON_MLA instead. "
"Example: `export VLLM_ATTENTION_BACKEND=CUTLASS_MLA`"
)
unsupported_features
=
[
alibi_slopes
,
sliding_window
,
logits_soft_cap
]
unsupported_features
=
[
alibi_slopes
,
sliding_window
,
logits_soft_cap
]
if
any
(
unsupported_features
):
if
any
(
unsupported_features
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment