Unverified Commit c3666f56 authored by Jee Jee Li's avatar Jee Jee Li Committed by GitHub
Browse files

[Misc] Fix Qwen2-MoE shared_expert_gate (#31339)


Signed-off-by: default avatarJee Jee Li <pandaleefree@gmail.com>
parent c79dbfa9
...@@ -20,7 +20,6 @@ class LoRARequest( ...@@ -20,7 +20,6 @@ class LoRARequest(
lora_name: str lora_name: str
lora_int_id: int lora_int_id: int
lora_path: str = "" lora_path: str = ""
long_lora_max_len: int | None = None
base_model_name: str | None = msgspec.field(default=None) base_model_name: str | None = msgspec.field(default=None)
tensorizer_config_dict: dict | None = None tensorizer_config_dict: dict | None = None
......
...@@ -111,7 +111,7 @@ class Qwen2MoeMLP(nn.Module): ...@@ -111,7 +111,7 @@ class Qwen2MoeMLP(nn.Module):
out, _ = self.down_proj(out) out, _ = self.down_proj(out)
if self.expert_gate is not None: if self.expert_gate is not None:
out = F.sigmoid(self.expert_gate(x)) * out out = F.sigmoid(self.expert_gate(x)[0]) * out
return out return out
...@@ -140,7 +140,13 @@ class Qwen2MoeSparseMoeBlock(nn.Module): ...@@ -140,7 +140,13 @@ class Qwen2MoeSparseMoeBlock(nn.Module):
prefix=f"{prefix}.gate", prefix=f"{prefix}.gate",
) )
self.shared_expert_gate = torch.nn.Linear(config.hidden_size, 1, bias=False) self.shared_expert_gate = ReplicatedLinear(
config.hidden_size,
1,
bias=False,
quant_config=None,
prefix=f"{prefix}.shared_expert_gate",
)
if config.shared_expert_intermediate_size > 0: if config.shared_expert_intermediate_size > 0:
self.shared_expert = Qwen2MoeMLP( self.shared_expert = Qwen2MoeMLP(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment