Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
FastMoE
Commits
c65039da
"vscode:/vscode.git/clone" did not exist on "8520f0bea4a09e60a217fe3a8cf24b8f733ec16c"
Commit
c65039da
authored
Feb 26, 2021
by
Jiezhong Qiu
Browse files
propose and discuss 3 solutions to expand bias
parent
63f6ebbf
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
26 additions
and
15 deletions
+26
-15
fmoe/layers.py
fmoe/layers.py
+26
-15
No files found.
fmoe/layers.py
View file @
c65039da
...
...
@@ -61,28 +61,39 @@ class FMoELinear(nn.Module):
'''
x
=
MOELinear
.
apply
(
inp
,
self
.
weight
,
fwd_expert_count
)
if
self
.
bias
is
not
None
:
# TODO: torch.repeat_interleave seems have wrong
# behaviors in backward, leading to incorrect
# gradient computation for bias.
# Thus we use a for-loop to manually expand the bias.
# This part should finally goes to MOELinear.apply.
# TODO: torch.repeat_interleave seems have numerical
# instability in backward, leading to incorrect
# gradient computation for solution 1 and 2.
# Solution 3 uses a for-loop to expand the bias,
# but is 50% slower.
# This part should finally goes to MOELinear.apply,
# like MOELinear.apply(x, weight, bias, count)
# Solution 1
# bias = torch.repeat_interleave(self.bias,
# fwd_expert_count.to(self.bias.device), dim=0)
bias
=
[]
for
i
in
range
(
self
.
num_expert
):
if
fwd_expert_count
[
i
]
>
0
:
bias
.
append
(
self
.
bias
[
i
].
unsqueeze
(
0
).
expand
(
fwd_expert_count
[
i
],
-
1
)
)
bias
=
torch
.
cat
(
bias
,
dim
=
0
)
# Solution 2
bias_idx
=
torch
.
arange
(
self
.
num_expert
)
\
.
repeat_interleave
(
fwd_expert_count
)
bias
=
self
.
bias
[
bias_idx
]
# Solution 3
# bias = []
# for i in range(self.num_expert):
# if fwd_expert_count[i] > 0:
# bias.append(
# self.bias[i].unsqueeze(0).expand(
# fwd_expert_count[i], -1
# )
# )
# bias = torch.cat(bias, dim=0)
x
=
x
+
bias
return
x
def
extra_repr
(
self
)
->
str
:
return
'num_expert={}, in_features={},
\
out_features={}, bias={}, rank={}'
.
format
(
out_features={}, bias={}, rank={}'
.
format
(
self
.
num_expert
,
self
.
in_feat
,
self
.
out_feat
,
self
.
bias
is
not
None
,
self
.
rank
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment