[Bugfix] Fix broken MTP weight loading for FP8 KV Scales (#27227)

Signed-off-by: Benjamin Chislett <bchislett@nvidia.com>

[Bugfix] Fix broken MTP weight loading for FP8 KV Scales (#27227)
Signed-off-by: Benjamin Chislett <bchislett@nvidia.com>
f381cf23 · Benjamin Chislett · GitHub · 5ff5d94e · f381cf23
Unverified Commit f381cf23 authored Oct 21, 2025 by Benjamin Chislett Committed by GitHub Oct 20, 2025
Show whitespace changes
Inline Side-by-side

Showing with 8 additions and 1 deletion

vllm/model_executor/models/deepseek_mtp.py vllm/model_executor/models/deepseek_mtp.py +8 -1

No files found.
--- a/vllm/model_executor/models/deepseek_mtp.py
+++ b/vllm/model_executor/models/deepseek_mtp.py
@@ -16,7 +16,10 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
    ParallelLMHead,
    VocabParallelEmbedding,
 )
-from vllm.model_executor.model_loader.weight_utils import default_weight_loader
+from vllm.model_executor.model_loader.weight_utils import (
+    default_weight_loader,
+    maybe_remap_kv_scale_name,
+)
 from vllm.platforms import current_platform
 from vllm.sequence import IntermediateTensors

@@ -278,6 +281,10 @@ class DeepSeekMTP(nn.Module, SupportsPP):
                    if name.endswith(".bias") and name not in params_dict:
                        continue

+                    name = maybe_remap_kv_scale_name(name, params_dict)
+                    if name is None:
+                        continue
+
                    # According to DeepSeek-V3 Technical Report, MTP modules
                    # shares embedding layer. We only load the first weights.
                    if (