Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a7be77be
Unverified
Commit
a7be77be
authored
Feb 05, 2026
by
Chauncey
Committed by
GitHub
Feb 05, 2026
Browse files
[Bugfix] fix DeepSeek R1 with CUTLASS MLA Broken on B200 (#33637)
Signed-off-by:
chaunceyjiang
<
chaunceyjiang@gmail.com
>
parent
bbe0574d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
1 addition
and
4 deletions
+1
-4
vllm/model_executor/layers/attention/mla_attention.py
vllm/model_executor/layers/attention/mla_attention.py
+1
-4
No files found.
vllm/model_executor/layers/attention/mla_attention.py
View file @
a7be77be
...
@@ -293,7 +293,6 @@ class MLAAttention(nn.Module, AttentionLayerBase):
...
@@ -293,7 +293,6 @@ class MLAAttention(nn.Module, AttentionLayerBase):
prefix
:
str
=
""
,
prefix
:
str
=
""
,
use_sparse
:
bool
=
False
,
use_sparse
:
bool
=
False
,
indexer
:
object
|
None
=
None
,
indexer
:
object
|
None
=
None
,
q_pad_num_heads
:
int
|
None
=
None
,
**
extra_impl_args
,
**
extra_impl_args
,
):
):
super
().
__init__
()
super
().
__init__
()
...
@@ -308,7 +307,6 @@ class MLAAttention(nn.Module, AttentionLayerBase):
...
@@ -308,7 +307,6 @@ class MLAAttention(nn.Module, AttentionLayerBase):
self
.
head_size
=
kv_lora_rank
+
qk_rope_head_dim
self
.
head_size
=
kv_lora_rank
+
qk_rope_head_dim
self
.
layer_name
=
prefix
self
.
layer_name
=
prefix
self
.
indexer
=
indexer
self
.
indexer
=
indexer
self
.
q_pad_num_heads
=
q_pad_num_heads
self
.
num_kv_heads
=
1
self
.
num_kv_heads
=
1
self
.
qk_head_dim
=
self
.
qk_nope_head_dim
+
self
.
qk_rope_head_dim
self
.
qk_head_dim
=
self
.
qk_nope_head_dim
+
self
.
qk_rope_head_dim
...
@@ -375,10 +373,9 @@ class MLAAttention(nn.Module, AttentionLayerBase):
...
@@ -375,10 +373,9 @@ class MLAAttention(nn.Module, AttentionLayerBase):
v_head_dim
=
self
.
v_head_dim
,
v_head_dim
=
self
.
v_head_dim
,
kv_b_proj
=
kv_b_proj
,
kv_b_proj
=
kv_b_proj
,
indexer
=
indexer
,
indexer
=
indexer
,
q_pad_num_heads
=
q_pad_num_heads
,
**
extra_impl_args
,
**
extra_impl_args
,
)
)
self
.
q_pad_num_heads
=
getattr
(
self
.
impl
,
"q_pad_num_heads"
,
None
)
self
.
use_direct_call
=
not
current_platform
.
opaque_attention_op
()
self
.
use_direct_call
=
not
current_platform
.
opaque_attention_op
()
compilation_config
=
get_current_vllm_config
().
compilation_config
compilation_config
=
get_current_vllm_config
().
compilation_config
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment