Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
bdd47d64
Commit
bdd47d64
authored
Jan 25, 2021
by
Jared Casper
Browse files
Address comments, fix argument bug.
parent
78066ab0
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
7 additions
and
8 deletions
+7
-8
megatron/arguments.py
megatron/arguments.py
+1
-1
megatron/mpu/layers.py
megatron/mpu/layers.py
+1
-3
tools/merge_mp_partitions.py
tools/merge_mp_partitions.py
+5
-4
No files found.
megatron/arguments.py
View file @
bdd47d64
...
@@ -503,7 +503,7 @@ def _add_distributed_args(parser):
...
@@ -503,7 +503,7 @@ def _add_distributed_args(parser):
' and returns function to complete it instead.'
' and returns function to complete it instead.'
'Also turns on --use-cpu-initialization flag.'
'Also turns on --use-cpu-initialization flag.'
'This is for external DDP manager.'
)
'This is for external DDP manager.'
)
group
.
add_argument
(
'--use-cpu-initialization'
,
type
=
bool
,
required
=
Fals
e
,
group
.
add_argument
(
'--use-cpu-initialization'
,
action
=
'store_true'
,
default
=
Non
e
,
help
=
'If set, affine parallel weights initialization uses CPU'
)
help
=
'If set, affine parallel weights initialization uses CPU'
)
return
parser
return
parser
...
...
megatron/mpu/layers.py
View file @
bdd47d64
...
@@ -260,9 +260,7 @@ class ColumnParallelLinear(torch.nn.Module):
...
@@ -260,9 +260,7 @@ class ColumnParallelLinear(torch.nn.Module):
self
.
output_size_per_partition
,
self
.
output_size_per_partition
,
device
=
torch
.
cuda
.
current_device
(),
device
=
torch
.
cuda
.
current_device
(),
dtype
=
args
.
params_dtype
))
dtype
=
args
.
params_dtype
))
self
.
bias
.
tensor_model_parallel
=
True
set_tensor_model_parallel_attributes
(
self
.
bias
,
True
,
0
,
stride
)
self
.
bias
.
partition_dim
=
0
self
.
bias
.
partition_stride
=
stride
# Always initialize bias to zero.
# Always initialize bias to zero.
with
torch
.
no_grad
():
with
torch
.
no_grad
():
self
.
bias
.
zero_
()
self
.
bias
.
zero_
()
...
...
tools/merge_mp_partitions.py
View file @
bdd47d64
...
@@ -199,15 +199,16 @@ def main():
...
@@ -199,15 +199,16 @@ def main():
'no_load_rng'
:
True
,
'no_load_rng'
:
True
,
'save_interval'
:
1
})
'save_interval'
:
1
})
args
=
get_args
()
args
=
get_args
()
model_type
=
args
.
model_type
orig_tensor_model_parallel_size
=
args
.
tensor_model_parallel_size
args
.
tensor_model_parallel_size
=
1
tokenizer
=
rebuild_tokenizer
(
args
)
if
args
.
pipeline_model_parallel_size
>
1
:
if
args
.
pipeline_model_parallel_size
>
1
:
print
(
"Checkpoints with pipeline model parallelism are not currently supported."
)
print
(
"Checkpoints with pipeline model parallelism are not currently supported."
)
exit
()
exit
()
model_type
=
args
.
model_type
orig_tensor_model_parallel_size
=
args
.
tensor_model_parallel_size
args
.
tensor_model_parallel_size
=
1
tokenizer
=
rebuild_tokenizer
(
args
)
print
(
'
\n
merging model parallel partitions ...'
)
print
(
'
\n
merging model parallel partitions ...'
)
print
(
' > number of partitions: {}'
.
format
(
orig_tensor_model_parallel_size
))
print
(
' > number of partitions: {}'
.
format
(
orig_tensor_model_parallel_size
))
print
(
' > checkpoint path: {}'
.
format
(
args
.
load
))
print
(
' > checkpoint path: {}'
.
format
(
args
.
load
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment