Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
804ed2e6
Commit
804ed2e6
authored
Jan 24, 2022
by
Lawrence McAfee
Browse files
working with interleaving
parent
a06af061
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
24 additions
and
7 deletions
+24
-7
megatron/arguments.py
megatron/arguments.py
+1
-1
megatron/mpu/initialize.py
megatron/mpu/initialize.py
+3
-3
megatron/schedules.py
megatron/schedules.py
+20
-3
No files found.
megatron/arguments.py
View file @
804ed2e6
...
@@ -147,7 +147,7 @@ def parse_args(extra_args_provider=None, defaults={},
...
@@ -147,7 +147,7 @@ def parse_args(extra_args_provider=None, defaults={},
# args.num_layers_per_virtual_pipeline_stage
# args.num_layers_per_virtual_pipeline_stage
# <<<
# <<<
args
.
virtual_pipeline_model_parallel_size
=
\
args
.
virtual_pipeline_model_parallel_size
=
\
(
args
.
num_layers
//
transformer_pipeline_size
)
//
\
(
args
.
num_layers
//
args
.
transformer_pipeline_
model_parallel_
size
)
//
\
args
.
num_layers_per_virtual_pipeline_stage
args
.
num_layers_per_virtual_pipeline_stage
# >>>
# >>>
# from lutil import pax
# from lutil import pax
...
...
megatron/mpu/initialize.py
View file @
804ed2e6
...
@@ -350,13 +350,13 @@ def get_num_layers(args, is_encoder_and_decoder_model):
...
@@ -350,13 +350,13 @@ def get_num_layers(args, is_encoder_and_decoder_model):
# get_pipeline_model_parallel_world_size()
# get_pipeline_model_parallel_world_size()
# )
# )
# <<<
# <<<
assert
args
.
num_layers
%
transformer_pipeline_size
==
0
,
\
assert
args
.
num_layers
%
args
.
transformer_pipeline_
model_parallel_
size
==
0
,
\
'num_layers must be divisible by transformer_pipeline_size'
'num_layers must be divisible by transformer_pipeline_
model_parallel_
size'
num_layers
=
(
num_layers
=
(
0
0
if
args
.
standalone_embed_stage
if
args
.
standalone_embed_stage
and
get_pipeline_model_parallel_rank
()
==
0
else
and
get_pipeline_model_parallel_rank
()
==
0
else
args
.
num_layers
//
transformer_pipeline_size
args
.
num_layers
//
args
.
transformer_pipeline_
model_parallel_
size
)
)
else
:
else
:
num_layers
=
args
.
num_layers
num_layers
=
args
.
num_layers
...
...
megatron/schedules.py
View file @
804ed2e6
...
@@ -40,9 +40,26 @@ def get_forward_backward_func():
...
@@ -40,9 +40,26 @@ def get_forward_backward_func():
# "pipeline size" : args.pipeline_model_parallel_size,
# "pipeline size" : args.pipeline_model_parallel_size,
# })
# })
# <<<
# <<<
assert
get_num_microbatches
()
%
args
.
pipeline_model_parallel_size
==
0
,
\
# >>>
'number of microbatches is not divisible by pipeline-parallel '
\
# assert get_num_microbatches() % args.pipeline_model_parallel_size == 0, \
'size when using interleaved schedule'
# 'number of microbatches is not divisible by pipeline-parallel ' \
# 'size when using interleaved schedule'
# assert get_num_microbatches() % \
# args.transformer_pipeline_model_parallel_size == 0, \
# 'number of microbatches (%d) is not divisible by transformer-' \
# 'pipeline-model-parallel-size (%d) when using interleaved ' \
# 'schedule' % (
# get_num_microbatches(),
# args.transformer_pipeline_model_parallel_size,
# )
assert
get_num_microbatches
()
%
\
args
.
pipeline_model_parallel_size
==
0
,
\
'number of microbatches (%d) is not divisible by pipeline-'
\
'model-parallel-size (%d) when using interleaved schedule'
%
(
get_num_microbatches
(),
args
.
pipeline_model_parallel_size
,
)
# <<<
else
:
else
:
forward_backward_func
=
forward_backward_pipelining_without_interleaving
forward_backward_func
=
forward_backward_pipelining_without_interleaving
else
:
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment