update weights_not_loaded and flash_mla_with_kvcache

a3f4b5b8 · zhuwenwen · d89f7579 · a3f4b5b8 · a3f4b5b8
Commit a3f4b5b8 authored Dec 22, 2025 by zhuwenwen
Showing with 3 additions and 0 deletions

vllm/model_executor/model_loader/default_loader.py vllm/model_executor/model_loader/default_loader.py +1 -0

vllm/v1/attention/backends/mla/flashmla.py vllm/v1/attention/backends/mla/flashmla.py +2 -0

No files found.
--- a/vllm/model_executor/model_loader/default_loader.py
+++ b/vllm/model_executor/model_loader/default_loader.py
@@ -272,6 +272,7 @@ class DefaultModelLoader(BaseModelLoader):
        # that have loaded weights tracking currently.
        if model_config.quantization is None and loaded_weights is not None:
            weights_not_loaded = weights_to_load - loaded_weights
+            weights_not_loaded = {k for k in weights_not_loaded if not k.endwith("indexer.weights_proj.bias")}
            if weights_not_loaded:
                raise ValueError("Following weights were not initialized from "
                                 f"checkpoint: {weights_not_loaded}")
--- a/vllm/v1/attention/backends/mla/flashmla.py
+++ b/vllm/v1/attention/backends/mla/flashmla.py
@@ -210,6 +210,8 @@ class FlashMLAImpl(MLACommonImpl[FlashMLAMetadata]):
            causal=True,
            descale_q=layer._q_scale.reshape(1),
            descale_k=layer._k_scale.reshape(1),
+            is_fp8_kvcache=False,
+            indices= None,
        )

        return o, lse