Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
cb6f96b6
Commit
cb6f96b6
authored
Feb 15, 2022
by
Lawrence McAfee
Browse files
wip; switching to grad-buffer-centric design
parent
a3f3c3ad
Changes
4
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
351 additions
and
193 deletions
+351
-193
megatron/arguments.py
megatron/arguments.py
+5
-3
megatron/optimizer/__init__.py
megatron/optimizer/__init__.py
+7
-6
megatron/optimizer/optimizer.py
megatron/optimizer/optimizer.py
+334
-183
megatron/training.py
megatron/training.py
+5
-1
No files found.
megatron/arguments.py
View file @
cb6f96b6
...
@@ -130,9 +130,11 @@ def parse_args(extra_args_provider=None, defaults={},
...
@@ -130,9 +130,11 @@ def parse_args(extra_args_provider=None, defaults={},
args
.
global_batch_size
),
flush
=
True
)
args
.
global_batch_size
),
flush
=
True
)
assert
args
.
global_batch_size
>
0
assert
args
.
global_batch_size
>
0
if
args
.
num_layers_per_virtual_pipeline_stage
is
not
None
:
if
args
.
num_layers_per_virtual_pipeline_stage
is
not
None
:
assert
args
.
pipeline_model_parallel_size
>
2
,
\
# >>> [ temporarily turning off ]
'pipeline-model-parallel size should be greater than 2 with '
\
# assert args.pipeline_model_parallel_size > 2, \
'interleaved schedule'
# 'pipeline-model-parallel size should be greater than 2 with ' \
# 'interleaved schedule'
# <<<
assert
args
.
num_layers
%
args
.
num_layers_per_virtual_pipeline_stage
==
0
,
\
assert
args
.
num_layers
%
args
.
num_layers_per_virtual_pipeline_stage
==
0
,
\
'number of layers is not divisible by number of layers per virtual '
\
'number of layers is not divisible by number of layers per virtual '
\
'pipeline stage'
'pipeline stage'
...
...
megatron/optimizer/__init__.py
View file @
cb6f96b6
...
@@ -97,11 +97,11 @@ def get_megatron_optimizer(model,
...
@@ -97,11 +97,11 @@ def get_megatron_optimizer(model,
# from lutil import pax
# from lutil import pax
# pax(0, {
# pax(0, {
# "model" : model,
# "model" : model,
# "param_groups" : param_groups,
#
#
"param_groups" : param_groups,
# "param_groups / 0" : param_groups[0],
#
#
"param_groups / 0" : param_groups[0],
# "param_groups / 0 / params" : param_groups[0]["params"],
#
#
"param_groups / 0 / params" : param_groups[0]["params"],
# "param_groups / 1" : param_groups[1],
#
#
"param_groups / 1" : param_groups[1],
# "param_groups / 1 / params" : param_groups[1]["params"],
#
#
"param_groups / 1 / params" : param_groups[1]["params"],
# })
# })
# <<<
# <<<
...
@@ -164,7 +164,8 @@ def get_megatron_optimizer(model,
...
@@ -164,7 +164,8 @@ def get_megatron_optimizer(model,
params_have_main_grad
,
params_have_main_grad
,
args
.
use_contiguous_buffers_in_local_ddp
,
args
.
use_contiguous_buffers_in_local_ddp
,
args
.
bf16
,
args
.
bf16
,
grad_scaler
)
grad_scaler
,
model
)
# <<<
# <<<
# FP32.
# FP32.
...
...
megatron/optimizer/optimizer.py
View file @
cb6f96b6
This diff is collapsed.
Click to expand it.
megatron/training.py
View file @
cb6f96b6
...
@@ -365,8 +365,12 @@ def setup_model_and_optimizer(model_provider_func,
...
@@ -365,8 +365,12 @@ def setup_model_and_optimizer(model_provider_func,
unwrapped_model
=
unwrap_model
(
model
,
unwrapped_model
=
unwrap_model
(
model
,
(
torchDDP
,
LocalDDP
,
Float16Module
))
(
torchDDP
,
LocalDDP
,
Float16Module
))
optimizer
=
get_megatron_optimizer
(
unwrapped_model
,
no_wd_decay_cond
,
# >>>
# optimizer = get_megatron_optimizer(unwrapped_model, no_wd_decay_cond,
# scale_lr_cond, lr_mult)
optimizer
=
get_megatron_optimizer
(
model
,
no_wd_decay_cond
,
scale_lr_cond
,
lr_mult
)
scale_lr_cond
,
lr_mult
)
# <<<
opt_param_scheduler
=
get_optimizer_param_scheduler
(
optimizer
)
opt_param_scheduler
=
get_optimizer_param_scheduler
(
optimizer
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment