Unverified Commit 96fcd3c2 authored by BingjiaWang's avatar BingjiaWang Committed by GitHub
Browse files

[Misc] Support qwen3-next lora (#31719)

parent 1f214290
...@@ -145,7 +145,13 @@ class Qwen3NextSparseMoeBlock(nn.Module): ...@@ -145,7 +145,13 @@ class Qwen3NextSparseMoeBlock(nn.Module):
prefix=f"{prefix}.gate", prefix=f"{prefix}.gate",
) )
self.shared_expert_gate = torch.nn.Linear(config.hidden_size, 1, bias=False) self.shared_expert_gate = ReplicatedLinear(
config.hidden_size,
1,
bias=False,
quant_config=None,
prefix=f"{prefix}.shared_expert_gate",
)
if config.shared_expert_intermediate_size > 0: if config.shared_expert_intermediate_size > 0:
self.shared_expert = Qwen3NextMLP( self.shared_expert = Qwen3NextMLP(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment