[BUGFIX] Add missed remaping of the names of fp8 kv-scale (#32199)

Signed-off-by: Vadim Gimpelson <vadim.gimpelson@gmail.com>

[BUGFIX] Add missed remaping of the names of fp8 kv-scale (#32199)
Signed-off-by: Vadim Gimpelson <vadim.gimpelson@gmail.com>
9f430c94 · Vadim Gimpelson · GitHub · f8bd8394 · 9f430c94
Unverified Commit 9f430c94 authored Jan 13, 2026 by Vadim Gimpelson Committed by GitHub Jan 12, 2026
Show whitespace changes
Inline Side-by-side

Showing with 7 additions and 0 deletions

vllm/model_executor/models/qwen3_next.py vllm/model_executor/models/qwen3_next.py +7 -0

No files found.
--- a/vllm/model_executor/models/qwen3_next.py
+++ b/vllm/model_executor/models/qwen3_next.py
@@ -64,6 +64,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
 )
 from vllm.model_executor.model_loader.weight_utils import (
    default_weight_loader,
+    maybe_remap_kv_scale_name,
    sharded_weight_loader,
 )
 from vllm.model_executor.models.qwen2_moe import Qwen2MoeMLP as Qwen3NextMLP
@@ -1065,6 +1066,12 @@ class Qwen3NextModel(nn.Module):
            if name.startswith("mtp."):
                continue
+            # Remapping the name of FP8 kv-scale.
+            if name.endswith("scale"):
+                name = maybe_remap_kv_scale_name(name, params_dict)
+                if name is None:
+                    continue
            for param_name, weight_name, shard_id in stacked_params_mapping:
                if weight_name not in name:
                    continue