Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
451af742
Commit
451af742
authored
Jan 06, 2026
by
zhuwenwen
Browse files
fix weights_not_loaded
update weights_not_loaded and flash_mla_with_kvcache update paged_mqa_logits
parent
aa05dfd5
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
5 additions
and
3 deletions
+5
-3
vllm/model_executor/model_loader/default_loader.py
vllm/model_executor/model_loader/default_loader.py
+2
-2
vllm/v1/attention/backends/mla/flashmla.py
vllm/v1/attention/backends/mla/flashmla.py
+2
-0
vllm/v1/attention/backends/mla/indexer.py
vllm/v1/attention/backends/mla/indexer.py
+1
-1
No files found.
vllm/model_executor/model_loader/default_loader.py
View file @
451af742
...
...
@@ -313,7 +313,7 @@ class DefaultModelLoader(BaseModelLoader):
# We only enable strict check for non-quantized models
# that have loaded weights tracking currently.
if
model_config
.
quantization
is
None
and
loaded_weights
is
not
None
:
weights_not_loaded
=
weights_t
o
_load
-
loaded_weights
weights_not_loaded
=
{
k
for
k
in
weights_
no
t_load
ed
if
not
k
.
endswith
(
"indexer.weights_proj.bias"
)}
if
weights_not_loaded
:
raise
ValueError
(
"Following weights were not initialized from "
...
...
vllm/v1/attention/backends/mla/flashmla.py
View file @
451af742
...
...
@@ -336,6 +336,8 @@ class FlashMLAImpl(MLACommonImpl[FlashMLAMetadata]):
causal
=
True
,
descale_q
=
layer
.
_q_scale
.
reshape
(
1
),
descale_k
=
layer
.
_k_scale
.
reshape
(
1
),
is_fp8_kvcache
=
False
,
indices
=
None
,
)
o
=
reshape_attn_output_for_spec_decode
(
o
)
...
...
vllm/v1/attention/backends/mla/indexer.py
View file @
451af742
...
...
@@ -315,7 +315,7 @@ class DeepseekV32IndexerMetadataBuilder(AttentionMetadataBuilder):
seq_lens
=
common_attn_metadata
.
seq_lens
[:
num_decodes
]
if
is_deep_gemm_supported
():
if
current_platform
.
is_rocm
():
self
.
scheduler_metadata_buffer
[:]
=
gemmopt
.
get_paged_mqa_logits_metadata
(
self
.
scheduler_metadata_buffer
=
gemmopt
.
get_paged_mqa_logits_metadata
(
seq_lens
,
self
.
kv_cache_spec
.
block_size
,
self
.
num_sms
)
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment