Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
3f652469
Commit
3f652469
authored
Sep 02, 2021
by
slym
Browse files
reflect feedback
parent
16c90445
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
8 additions
and
4 deletions
+8
-4
megatron/arguments.py
megatron/arguments.py
+5
-2
megatron/initialize.py
megatron/initialize.py
+1
-1
megatron/mpu/layers.py
megatron/mpu/layers.py
+2
-1
No files found.
megatron/arguments.py
View file @
3f652469
...
@@ -462,8 +462,11 @@ def _add_training_args(parser):
...
@@ -462,8 +462,11 @@ def _add_training_args(parser):
group
.
add_argument
(
'--dataloader-type'
,
type
=
str
,
default
=
None
,
group
.
add_argument
(
'--dataloader-type'
,
type
=
str
,
default
=
None
,
choices
=
[
'single'
,
'cyclic'
],
choices
=
[
'single'
,
'cyclic'
],
help
=
'Single pass vs multiple pass data loader'
)
help
=
'Single pass vs multiple pass data loader'
)
group
.
add_argument
(
'--async-tensor-model-parallel-allreduce'
,
group
.
add_argument
(
'--no-async-tensor-model-parallel-allreduce'
,
action
=
'store_true'
)
action
=
'store_true'
,
help
=
'Disable asynchronous execution of '
'tensor-model-parallel all-reduce with weight '
'gradient compuation of a column-linear layer.'
)
return
parser
return
parser
...
...
megatron/initialize.py
View file @
3f652469
...
@@ -177,7 +177,7 @@ def _initialize_distributed():
...
@@ -177,7 +177,7 @@ def _initialize_distributed():
args
.
local_rank
=
device
args
.
local_rank
=
device
torch
.
cuda
.
set_device
(
device
)
torch
.
cuda
.
set_device
(
device
)
# Increase cuda stream priority of NCCL ops when overlapping with other ops
# Increase cuda stream priority of NCCL ops when overlapping with other ops
if
(
args
.
async_tensor_model_parallel_allreduce
and
if
(
not
args
.
no_
async_tensor_model_parallel_allreduce
and
args
.
tensor_model_parallel_size
>
1
):
args
.
tensor_model_parallel_size
>
1
):
from
torch._C._distributed_c10d
import
ProcessGroupNCCL
from
torch._C._distributed_c10d
import
ProcessGroupNCCL
...
...
megatron/mpu/layers.py
View file @
3f652469
...
@@ -305,7 +305,8 @@ class ColumnParallelLinear(torch.nn.Module):
...
@@ -305,7 +305,8 @@ class ColumnParallelLinear(torch.nn.Module):
else
:
else
:
self
.
register_parameter
(
'bias'
,
None
)
self
.
register_parameter
(
'bias'
,
None
)
self
.
async_tensor_model_parallel_allreduce
=
(
self
.
async_tensor_model_parallel_allreduce
=
(
args
.
async_tensor_model_parallel_allreduce
and
world_size
>
1
)
not
args
.
no_async_tensor_model_parallel_allreduce
and
world_size
>
1
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment