Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
FastMoE
Commits
34477955
Commit
34477955
authored
Feb 08, 2021
by
Sengxian
Browse files
Add DataParallel test for FMoE
parent
40841453
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
38 additions
and
24 deletions
+38
-24
tests/test_dp.py
tests/test_dp.py
+38
-24
No files found.
tests/test_dp.py
View file @
34477955
from
moe
import
FMoE
as
MOELayer
from
moe
import
BruteForceMoE
as
MOELayer_raw
import
torch
from
torch
import
nn
import
sys
import
os
import
pytest
import
torch
n_devices
=
int
(
os
.
environ
.
get
(
'N_GPUS'
,
'2'
))
def
test_dp
():
from
fmoe.gates
import
NaiveGate
from
fmoe.layers
import
FMoE
from
fmoe.transformer
import
_Expert
n_devices
=
int
(
os
.
environ
.
get
(
"N_GPUS"
,
"2"
))
@
pytest
.
mark
.
parametrize
(
"num_expert"
,
[
4
,
8
])
@
pytest
.
mark
.
parametrize
(
"top_k"
,
[
2
,
3
])
@
pytest
.
mark
.
parametrize
(
"batch_size"
,
[
4
])
@
pytest
.
mark
.
parametrize
(
"d_model"
,
[
16
])
@
pytest
.
mark
.
parametrize
(
"d_hidden"
,
[
32
])
def
test_fmoe_dp
(
num_expert
,
top_k
,
batch_size
,
d_model
,
d_hidden
,
activation
=
torch
.
nn
.
functional
.
gelu
,
):
torch
.
manual_seed
(
42
)
torch
.
cuda
.
manual_seed
(
42
)
batch_size
=
6
num_expert
=
4
in_feat
=
2
out_feat
=
3
inp
=
torch
.
rand
(
batch_size
,
in_feat
).
cuda
()
gate
=
torch
.
randint
(
low
=
0
,
high
=
num_expert
,
size
=
(
batch_size
,
),
requires_grad
=
False
).
cuda
()
experts
=
_Expert
(
num_expert
,
d_model
,
d_hidden
,
activation
).
cuda
()
print
(
"data parallel of our MoE model"
)
moe
=
MOELayer
(
num_expert
,
in_feat
,
out_feat
).
cuda
()
moe_dp
=
torch
.
nn
.
DataParallel
(
moe
,
device_ids
=
list
(
range
(
n_devices
)))
for
i
in
range
(
5
):
output
=
moe_dp
(
inp
,
gate
)
print
(
'Successful'
)
def
expert_fn
(
inp
,
gate
):
return
experts
(
inp
,
gate
)
moe
=
FMoE
(
num_expert
=
num_expert
,
d_model
=
d_model
,
gate
=
NaiveGate
,
world_size
=
1
,
mp_group
=
None
,
expert_fn
=
expert_fn
,
top_k
=
top_k
,
).
cuda
()
moe_dp
=
torch
.
nn
.
DataParallel
(
moe
,
device_ids
=
list
(
range
(
n_devices
)))
if
__name__
==
'__main__'
:
test_dp
(
)
for
i
in
range
(
5
)
:
output
=
moe_dp
(
torch
.
rand
(
batch_size
,
d_model
).
cuda
()
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment