"...git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "5237a82a35eea12363d86164fdd93ab8b26f6e7a"
Commit b44de4cd authored by Jiezhong Qiu's avatar Jiezhong Qiu
Browse files

add TODO for bias issue

parent 08e08319
...@@ -61,16 +61,21 @@ class FMoELinear(nn.Module): ...@@ -61,16 +61,21 @@ class FMoELinear(nn.Module):
''' '''
x = MOELinear.apply(inp, self.weight, fwd_expert_count) x = MOELinear.apply(inp, self.weight, fwd_expert_count)
if self.bias is not None: if self.bias is not None:
# TODO: torch.repeat_interleave seems have wrong behavior # TODO: torch.repeat_interleave seems have wrong
# in backward, leading to incorrect gradient for bias. # behaviors in backward, leading to incorrect
# Thus we use a for-loop to manually expand the bias term. # gradient computation for bias.
# Thus we use a for-loop to manually expand the bias.
# This part should finally goes to MOELinear.apply. # This part should finally goes to MOELinear.apply.
#bias = torch.repeat_interleave(self.bias, # bias = torch.repeat_interleave(self.bias,
# fwd_expert_count.to(self.bias.device), dim=0) # fwd_expert_count.to(self.bias.device), dim=0)
bias = [] bias = []
for i in range(self.num_expert): for i in range(self.num_expert):
if fwd_expert_count[i] > 0: if fwd_expert_count[i] > 0:
bias.append(self.bias[i].unsqueeze(0).expand(fwd_expert_count[i], -1)) bias.append(
self.bias[i].unsqueeze(0).expand(
fwd_expert_count[i], -1
)
)
bias = torch.cat(bias, dim=0) bias = torch.cat(bias, dim=0)
x = x + bias x = x + bias
return x return x
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment