Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
FastMoE
Commits
27af1828
Commit
27af1828
authored
Jan 03, 2021
by
Jiezhong Qiu
Browse files
make cudaStreamManager thread local
parent
f5cc759c
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
18 additions
and
7 deletions
+18
-7
pytorch/cuda/cuda_stream_manager.cpp
pytorch/cuda/cuda_stream_manager.cpp
+2
-1
pytorch/cuda/cuda_stream_manager.h
pytorch/cuda/cuda_stream_manager.h
+9
-1
pytorch/cuda/moe.py
pytorch/cuda/moe.py
+7
-5
No files found.
pytorch/cuda/cuda_stream_manager.cpp
View file @
27af1828
#include <cassert>
#include <cassert>
#include <thread>
#include "cuda_stream_manager.h"
#include "cuda_stream_manager.h"
CudaStreamManager
*
smgr
=
NULL
;
thread_local
CudaStreamManager
*
smgr
=
NULL
;
CudaStreamManager
*
getCudaStreamManager
(
const
size_t
num_expert
,
const
int
device
)
{
CudaStreamManager
*
getCudaStreamManager
(
const
size_t
num_expert
,
const
int
device
)
{
if
(
!
smgr
)
{
if
(
!
smgr
)
{
...
...
pytorch/cuda/cuda_stream_manager.h
View file @
27af1828
...
@@ -11,8 +11,16 @@
...
@@ -11,8 +11,16 @@
class
CudaStreamManager
{
class
CudaStreamManager
{
public:
public:
CudaStreamManager
(
const
size_t
num_expert_
,
const
int
device_
)
:
num_expert
(
num_expert_
),
device
(
device_
)
{
CudaStreamManager
(
const
size_t
num_expert_
,
const
int
device_
)
:
num_expert
(
num_expert_
),
device
(
device_
)
{
/*
Actually, we will see current_device == device,
which means pytorch always sets the correct device for us.
But for safety, we still manually set device to the desired one.
*/
int
current_device
;
checkCudaErrors
(
cudaGetDevice
(
&
current_device
));
printf
(
"CudaStreamManager construnctor called, get device %d, set device %d
\n
"
,
current_device
,
device
);
checkCudaErrors
(
cudaSetDevice
(
device
));
checkCudaErrors
(
cudaSetDevice
(
device
));
printf
(
"set device %d
\n
"
,
device
);
streams
=
new
cudaStream_t
[
num_expert
];
streams
=
new
cudaStream_t
[
num_expert
];
checkCudaErrors
(
cublasCreate
(
&
handle
));
checkCudaErrors
(
cublasCreate
(
&
handle
));
for
(
size_t
i
=
0
;
i
<
num_expert
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
num_expert
;
++
i
)
{
...
...
pytorch/cuda/moe.py
View file @
27af1828
...
@@ -115,7 +115,7 @@ def test():
...
@@ -115,7 +115,7 @@ def test():
def
test_dp
():
def
test_dp
():
torch
.
manual_seed
(
42
)
torch
.
manual_seed
(
42
)
torch
.
cuda
.
manual_seed
(
42
)
torch
.
cuda
.
manual_seed
(
42
)
batch_size
=
4
batch_size
=
6
num_expert
=
4
num_expert
=
4
in_feat
=
2
in_feat
=
2
out_feat
=
3
out_feat
=
3
...
@@ -125,14 +125,16 @@ def test_dp():
...
@@ -125,14 +125,16 @@ def test_dp():
print
(
"data parallel of a nn.Linear model"
)
print
(
"data parallel of a nn.Linear model"
)
linear
=
nn
.
Linear
(
in_feat
,
in_feat
).
cuda
()
linear
=
nn
.
Linear
(
in_feat
,
in_feat
).
cuda
()
moe_
linear
=
torch
.
nn
.
DataParallel
(
linear
,
device_ids
=
[
0
,
1
])
linear
_dp
=
torch
.
nn
.
DataParallel
(
linear
,
device_ids
=
[
0
,
1
,
2
])
output
=
moe_
linear
(
inp
)
output
=
linear
_dp
(
inp
)
print
(
"successful!"
)
print
(
"successful!"
)
print
(
"data parallel of our MoE model"
)
print
(
"data parallel of our MoE model"
)
moe
=
MOELayer
(
num_expert
,
in_feat
,
out_feat
).
cuda
()
moe
=
MOELayer
(
num_expert
,
in_feat
,
out_feat
).
cuda
()
moe_dp
=
torch
.
nn
.
DataParallel
(
moe
,
device_ids
=
[
0
,
1
])
moe_dp
=
torch
.
nn
.
DataParallel
(
moe
,
device_ids
=
[
0
,
1
,
2
])
output
=
moe_dp
(
inp
,
gate
)
for
i
in
range
(
5
):
print
(
i
,
"forward"
)
output
=
moe_dp
(
inp
,
gate
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment