Commit bc8e8181 authored by Rick Ho's avatar Rick Ho
Browse files

fix expert number

parent 761b4ffc
......@@ -43,7 +43,7 @@ class FFFN(nn.Module):
world_size=world_size)
self.h4toh = FMoE(num_expert, d_hidden, d_model,
world_size=world_size)
self.gate = nn.Linear(d_model, num_expert)
self.gate = nn.Linear(d_model, num_expert * world_size)
self.layer_norm = nn.LayerNorm(d_model)
self.bias = torch.nn.parameter.Parameter(torch.zeros(d_model,
dtype=torch.float32))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment