Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
FastMoE
Commits
b56c8043
"vscode:/vscode.git/clone" did not exist on "2a8339dbc853ef85616c43a716e921526531679e"
Unverified
Commit
b56c8043
authored
Feb 25, 2021
by
Rick Ho
Committed by
GitHub
Feb 25, 2021
Browse files
Merge pull request #7 from laekov/reproducibility
Reproducibility
parents
03b2a725
b44de4cd
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
18 additions
and
4 deletions
+18
-4
examples/transformer-xl/mem_transformer.py
examples/transformer-xl/mem_transformer.py
+2
-2
fmoe/layers.py
fmoe/layers.py
+16
-2
No files found.
examples/transformer-xl/mem_transformer.py
View file @
b56c8043
...
...
@@ -380,8 +380,8 @@ from fmoe import FMoETransformerMLP
class
CustomizedMoEPositionwiseFF
(
FMoETransformerMLP
):
def
__init__
(
self
,
d_model
,
d_inner
,
dropout
,
pre_lnorm
=
False
,
moe_num_expert
=
64
,
moe_top_k
=
2
):
activation
=
nn
.
Sequential
(
nn
.
Dropout
(
dropout
),
nn
.
ReLU
(
)
nn
.
ReLU
(
),
nn
.
Dropout
(
dropout
)
)
super
().
__init__
(
num_expert
=
moe_num_expert
,
d_model
=
d_model
,
d_hidden
=
d_inner
,
top_k
=
moe_top_k
,
do_lnorm
=
True
,
pre_lnorm
=
pre_lnorm
,
activation
=
activation
,
dropout
=
dropout
)
...
...
fmoe/layers.py
View file @
b56c8043
...
...
@@ -61,8 +61,22 @@ class FMoELinear(nn.Module):
'''
x
=
MOELinear
.
apply
(
inp
,
self
.
weight
,
fwd_expert_count
)
if
self
.
bias
is
not
None
:
bias
=
torch
.
repeat_interleave
(
self
.
bias
,
fwd_expert_count
.
to
(
self
.
bias
.
device
),
dim
=
0
)
# TODO: torch.repeat_interleave seems have wrong
# behaviors in backward, leading to incorrect
# gradient computation for bias.
# Thus we use a for-loop to manually expand the bias.
# This part should finally goes to MOELinear.apply.
# bias = torch.repeat_interleave(self.bias,
# fwd_expert_count.to(self.bias.device), dim=0)
bias
=
[]
for
i
in
range
(
self
.
num_expert
):
if
fwd_expert_count
[
i
]
>
0
:
bias
.
append
(
self
.
bias
[
i
].
unsqueeze
(
0
).
expand
(
fwd_expert_count
[
i
],
-
1
)
)
bias
=
torch
.
cat
(
bias
,
dim
=
0
)
x
=
x
+
bias
return
x
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment