Commit 1dc7e73e authored by Jiezhong Qiu's avatar Jiezhong Qiu
Browse files

seems that solution 1 is still the fatest

i.e., expand bias using torch.repeat_interleave directly
parent c65039da
...@@ -70,13 +70,13 @@ class FMoELinear(nn.Module): ...@@ -70,13 +70,13 @@ class FMoELinear(nn.Module):
# like MOELinear.apply(x, weight, bias, count) # like MOELinear.apply(x, weight, bias, count)
# Solution 1 # Solution 1
# bias = torch.repeat_interleave(self.bias, bias = torch.repeat_interleave(self.bias,
# fwd_expert_count.to(self.bias.device), dim=0) fwd_expert_count.to(self.bias.device), dim=0)
# Solution 2 # Solution 2
bias_idx = torch.arange(self.num_expert)\ # bias_idx = torch.arange(self.num_expert)\
.repeat_interleave(fwd_expert_count) # .repeat_interleave(fwd_expert_count)
bias = self.bias[bias_idx] # bias = self.bias[bias_idx]
# Solution 3 # Solution 3
# bias = [] # bias = []
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment