Unverified Commit 6fff24f3 authored by Linda's avatar Linda Committed by GitHub
Browse files

[Bugfix] Qwen3.5 kv-scale weight remapping (#34719)


Signed-off-by: default avatarLinda-Stadter <57756729+Linda-Stadter@users.noreply.github.com>
parent 23210a91
...@@ -57,6 +57,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ...@@ -57,6 +57,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
) )
from vllm.model_executor.model_loader.weight_utils import ( from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader, default_weight_loader,
maybe_remap_kv_scale_name,
) )
from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.sequence import IntermediateTensors from vllm.sequence import IntermediateTensors
...@@ -397,6 +398,12 @@ class Qwen3_5Model(Qwen3NextModel): ...@@ -397,6 +398,12 @@ class Qwen3_5Model(Qwen3NextModel):
if name.startswith("mtp."): if name.startswith("mtp."):
continue continue
# Remapping the name of FP8 kv-scale.
if name.endswith("scale"):
name = maybe_remap_kv_scale_name(name, params_dict)
if name is None:
continue
for param_name, weight_name, shard_id in stacked_params_mapping: for param_name, weight_name, shard_id in stacked_params_mapping:
if "experts.gate_up_proj" in name or "experts.down_proj" in name: if "experts.gate_up_proj" in name or "experts.down_proj" in name:
is_fused_expert = True is_fused_expert = True
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment