Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
FastMoE
Commits
bb92d30e
Commit
bb92d30e
authored
Apr 27, 2021
by
Rick Ho
Browse files
limit by threshold cuda code
parent
450549a4
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
62 additions
and
3 deletions
+62
-3
cuda/fmoe_cuda.cpp
cuda/fmoe_cuda.cpp
+13
-3
cuda/tests/.gitignore
cuda/tests/.gitignore
+1
-0
cuda/tests/Makefile
cuda/tests/Makefile
+4
-0
cuda/tests/balancing.cu
cuda/tests/balancing.cu
+44
-0
No files found.
cuda/fmoe_cuda.cpp
View file @
bb92d30e
...
...
@@ -2,6 +2,7 @@
#include <vector>
#include <torch/extension.h>
// global_exchange
#ifdef FMOE_USE_NCCL
#include <c10d/ProcessGroupNCCL.hpp>
std
::
vector
<
torch
::
Tensor
>
_expert_exchange
(
...
...
@@ -20,6 +21,7 @@ std::vector<torch::Tensor> _global_gather(
void
_ensure_nccl
(
c10d
::
ProcessGroupNCCL
&
p
,
torch
::
Tensor
t
);
#endif // FMOE_USE_NCCL
// local_exchange
std
::
vector
<
torch
::
Tensor
>
_expert_count
(
torch
::
Tensor
gate
,
size_t
num_expert
);
...
...
@@ -30,16 +32,22 @@ std::vector<torch::Tensor> _local_gather(
torch
::
Tensor
output_buf
,
torch
::
Tensor
pos
);
// parallel_linear
std
::
vector
<
torch
::
Tensor
>
_linear_forward
(
torch
::
Tensor
input_buf
,
torch
::
Tensor
weight
,
torch
::
Tensor
expert_count
);
std
::
vector
<
torch
::
Tensor
>
_linear_backward
(
torch
::
Tensor
grad_output_buf
,
// [batch_size x out_feat]
torch
::
Tensor
input_buf
,
// [batch_size x out_feat]
torch
::
Tensor
weight
,
// [num_expert x out_feat x in_feat]
torch
::
Tensor
grad_output_buf
,
torch
::
Tensor
input_buf
,
torch
::
Tensor
weight
,
torch
::
Tensor
expert_count
);
// balancing
std
::
vector
<
torch
::
Tensor
>
_limit_by_capacity
(
torch
::
Tensor
expert_count
,
torch
::
Tensor
capacity
,
long
n_expert
,
long
n_experts
)
{
PYBIND11_MODULE
(
TORCH_EXTENSION_NAME
,
m
)
{
#ifdef FMOE_USE_NCCL
m
.
def
(
"expert_exchange"
,
&
_expert_exchange
,
"FastMoE expert exchange (CUDA)"
);
...
...
@@ -54,4 +62,6 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m
.
def
(
"linear_forward"
,
&
_linear_forward
,
"FastMoE forward (CUDA)"
);
m
.
def
(
"linear_backward"
,
&
_linear_backward
,
"FastMoE backward (CUDA)"
);
m
.
def
(
"limit_by_capacity"
,
&
_limit_by_capacity
,
"FastMoE limit experts by capacity(CUDA)"
);
}
cuda/tests/.gitignore
0 → 100644
View file @
bb92d30e
test_*
cuda/tests/Makefile
0 → 100644
View file @
bb92d30e
default
:
test_balancing
test_%
:
%.cu
nvcc
$<
../stream_manager.cpp
-lcublas
-o
$@
cuda/tests/balancing.cu
0 → 100644
View file @
bb92d30e
#include "../balancing.cuh"
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cuda.h>
#include <cuda_runtime.h>
int
main
(
int
argc
,
char
*
args
[])
{
int
n_worker
=
atoi
(
args
[
1
]);
int
n_expert
=
atoi
(
args
[
2
]);
int
cap_v
=
atoi
(
args
[
3
]);
int
tot_expert
=
n_worker
*
n_expert
;
long
*
lec
=
new
long
[
tot_expert
];
for
(
int
i
=
0
;
i
<
tot_expert
;
++
i
)
{
lec
[
i
]
=
i
;
}
long
*
g_lec
;
cudaMalloc
(
&
g_lec
,
sizeof
(
long
)
*
tot_expert
);
cudaMemcpy
(
g_lec
,
lec
,
sizeof
(
long
)
*
tot_expert
,
cudaMemcpyHostToDevice
);
int
*
cap
=
new
int
[
n_expert
];
for
(
int
i
=
0
;
i
<
n_expert
;
++
i
)
{
cap
[
i
]
=
cap_v
;
}
int
*
g_cap
;
cudaMalloc
(
&
g_cap
,
sizeof
(
int
)
*
n_expert
);
cudaMemcpy
(
g_cap
,
cap
,
sizeof
(
int
)
*
n_expert
,
cudaMemcpyHostToDevice
);
long
*
eca
=
new
long
[
tot_expert
];
long
*
g_eca
;
cudaMalloc
(
&
g_eca
,
sizeof
(
long
)
*
tot_expert
);
auto
smgr
=
getCudaStreamManager
(
0
);
fmoe_cuda_limit_by_capacity_impl
(
g_lec
,
g_cap
,
g_eca
,
n_expert
,
n_worker
,
smgr
);
cudaMemcpy
(
cap
,
g_cap
,
sizeof
(
int
)
*
n_expert
,
cudaMemcpyDeviceToHost
);
cudaMemcpy
(
eca
,
g_eca
,
sizeof
(
long
)
*
tot_expert
,
cudaMemcpyDeviceToHost
);
printf
(
"%d
\n
"
,
cap
[
0
]);
for
(
int
i
=
0
;
i
<
tot_expert
;
++
i
)
{
printf
(
"%ld %ld
\n
"
,
lec
[
i
],
eca
[
i
]);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment