Unverified Commit 1e5ad9b7 authored by Jee Jee Li's avatar Jee Jee Li Committed by GitHub
Browse files

[Bugfix] Fix Qwen3NextForCausalLM packed_modules_mapping (#35413)


Signed-off-by: default avatarJee Jee Li <pandaleefree@gmail.com>
parent cabdaa76
...@@ -412,6 +412,8 @@ class Qwen3NextGatedDeltaNet(nn.Module, MambaBase): ...@@ -412,6 +412,8 @@ class Qwen3NextGatedDeltaNet(nn.Module, MambaBase):
prefix=f"{prefix}.in_proj_qkvz", prefix=f"{prefix}.in_proj_qkvz",
) )
# ba_proj doesn't support blockwise fp8 quantization. # ba_proj doesn't support blockwise fp8 quantization.
# # in_proj_ba is defined as MergedColumnParallelLinear for
# compatibility with Qwen3_5.
self.in_proj_ba = MergedColumnParallelLinear( self.in_proj_ba = MergedColumnParallelLinear(
input_size=self.hidden_size, input_size=self.hidden_size,
output_sizes=[self.num_v_heads] * 2, output_sizes=[self.num_v_heads] * 2,
...@@ -1326,6 +1328,8 @@ class Qwen3NextForCausalLM( ...@@ -1326,6 +1328,8 @@ class Qwen3NextForCausalLM(
"v_proj", "v_proj",
], ],
"gate_up_proj": ["gate_proj", "up_proj"], "gate_up_proj": ["gate_proj", "up_proj"],
"in_proj_qkvz": ["in_proj_qkvz"],
"in_proj_ba": ["in_proj_ba"],
} }
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment