"docs/source/features/quantization/bnb.md" did not exist on "9c93636d84414591ae4d7b9c1174af7e91052fd8"
Unverified Commit f3c7941e authored by Kai Song's avatar Kai Song Committed by GitHub
Browse files

[Bugfix]Fix EP precision for Qwen3.5, Qwen3-Next (#39181)


Signed-off-by: default avatarSong Kai <songkai05@baidu.com>
parent 3352bf8b
......@@ -80,6 +80,7 @@ class Qwen2MoeMLP(nn.Module):
quant_config: QuantizationConfig | None = None,
reduce_results: bool = True,
expert_gate: torch.nn.Linear | None = None,
is_sequence_parallel: bool = False,
prefix: str = "",
) -> None:
super().__init__()
......@@ -88,6 +89,7 @@ class Qwen2MoeMLP(nn.Module):
[intermediate_size] * 2,
bias=False,
quant_config=quant_config,
disable_tp=is_sequence_parallel,
prefix=f"{prefix}.gate_up_proj",
)
self.down_proj = RowParallelLinear(
......@@ -96,6 +98,7 @@ class Qwen2MoeMLP(nn.Module):
bias=False,
quant_config=quant_config,
reduce_results=reduce_results,
disable_tp=is_sequence_parallel,
prefix=f"{prefix}.down_proj",
)
if hidden_act != "silu":
......
......@@ -140,6 +140,7 @@ class Qwen3NextSparseMoeBlock(nn.Module):
quant_config=quant_config,
reduce_results=False,
expert_gate=self.shared_expert_gate,
is_sequence_parallel=self.is_sequence_parallel,
prefix=f"{prefix}.shared_expert",
)
else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment