Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
16c90445
Commit
16c90445
authored
Sep 02, 2021
by
slym
Browse files
minor changes
parent
cf7efd4f
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
9 additions
and
11 deletions
+9
-11
megatron/arguments.py
megatron/arguments.py
+2
-4
megatron/initialize.py
megatron/initialize.py
+1
-1
megatron/mpu/layers.py
megatron/mpu/layers.py
+6
-6
No files found.
megatron/arguments.py
View file @
16c90445
...
@@ -462,10 +462,8 @@ def _add_training_args(parser):
...
@@ -462,10 +462,8 @@ def _add_training_args(parser):
group
.
add_argument
(
'--dataloader-type'
,
type
=
str
,
default
=
None
,
group
.
add_argument
(
'--dataloader-type'
,
type
=
str
,
default
=
None
,
choices
=
[
'single'
,
'cyclic'
],
choices
=
[
'single'
,
'cyclic'
],
help
=
'Single pass vs multiple pass data loader'
)
help
=
'Single pass vs multiple pass data loader'
)
group
.
add_argument
(
'--async-tensor-parallel-allreduce'
,
action
=
'store_true'
,
group
.
add_argument
(
'--async-tensor-model-parallel-allreduce'
,
help
=
'Enable asynchronous excution of tensor-parallel allreduce '
action
=
'store_true'
)
'with other GPU operators'
,
dest
=
'async_tensor_parallel_allreduce'
)
return
parser
return
parser
...
...
megatron/initialize.py
View file @
16c90445
...
@@ -177,7 +177,7 @@ def _initialize_distributed():
...
@@ -177,7 +177,7 @@ def _initialize_distributed():
args
.
local_rank
=
device
args
.
local_rank
=
device
torch
.
cuda
.
set_device
(
device
)
torch
.
cuda
.
set_device
(
device
)
# Increase cuda stream priority of NCCL ops when overlapping with other ops
# Increase cuda stream priority of NCCL ops when overlapping with other ops
if
(
args
.
async_tensor_parallel_allreduce
and
if
(
args
.
async_tensor_
model_
parallel_allreduce
and
args
.
tensor_model_parallel_size
>
1
):
args
.
tensor_model_parallel_size
>
1
):
from
torch._C._distributed_c10d
import
ProcessGroupNCCL
from
torch._C._distributed_c10d
import
ProcessGroupNCCL
...
...
megatron/mpu/layers.py
View file @
16c90445
...
@@ -199,7 +199,7 @@ class VocabParallelEmbedding(torch.nn.Module):
...
@@ -199,7 +199,7 @@ class VocabParallelEmbedding(torch.nn.Module):
return
output
return
output
class
ColumnParallelLinear
Function
(
torch
.
autograd
.
Function
):
class
ColumnParallelLinear
WithAsyncAllreduce
(
torch
.
autograd
.
Function
):
"""
"""
Column-parallel linear layer execution with asynchronous all-reduce
Column-parallel linear layer execution with asynchronous all-reduce
execution in backprop.
execution in backprop.
...
@@ -304,19 +304,19 @@ class ColumnParallelLinear(torch.nn.Module):
...
@@ -304,19 +304,19 @@ class ColumnParallelLinear(torch.nn.Module):
self
.
bias
.
zero_
()
self
.
bias
.
zero_
()
else
:
else
:
self
.
register_parameter
(
'bias'
,
None
)
self
.
register_parameter
(
'bias'
,
None
)
self
.
async_tensor_parallel_allreduce
=
(
args
.
async_tensor_parallel_allreduce
self
.
async_tensor_
model_
parallel_allreduce
=
(
and
world_size
>
1
)
args
.
async_tensor_model_parallel_allreduce
and
world_size
>
1
)
def
forward
(
self
,
input_
):
def
forward
(
self
,
input_
):
bias
=
self
.
bias
if
not
self
.
skip_bias_add
else
None
bias
=
self
.
bias
if
not
self
.
skip_bias_add
else
None
if
self
.
async_tensor_parallel_allreduce
:
if
self
.
async_tensor_
model_
parallel_allreduce
:
input_shape
=
input_
.
shape
input_shape
=
input_
.
shape
input_
=
input_
.
view
(
input_shape
[
0
]
*
input_shape
[
1
],
input_shape
[
2
])
input_
=
input_
.
view
(
input_shape
[
0
]
*
input_shape
[
1
],
input_shape
[
2
])
# Maxtrix multiply with asynchronouse
tensor-parallel
all-reduce execution
# Maxtrix multiply with asynchronouse all-reduce execution
output_parallel
=
ColumnParallelLinear
Function
.
apply
(
output_parallel
=
ColumnParallelLinear
WithAsyncAllreduce
.
apply
(
input_
,
self
.
weight
,
bias
,
bias
is
not
None
)
input_
,
self
.
weight
,
bias
,
bias
is
not
None
)
output_parallel
=
output_parallel
.
view
(
output_parallel
=
output_parallel
.
view
(
input_shape
[
0
],
input_shape
[
1
],
output_parallel
.
shape
[
1
])
input_shape
[
0
],
input_shape
[
1
],
output_parallel
.
shape
[
1
])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment