Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
48269d8d
Commit
48269d8d
authored
Oct 13, 2020
by
Mostofa Patwary
Browse files
added adam betas and eps as arguments
parent
42d21122
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
11 additions
and
1 deletion
+11
-1
megatron/arguments.py
megatron/arguments.py
+9
-0
megatron/training.py
megatron/training.py
+2
-1
No files found.
megatron/arguments.py
View file @
48269d8d
...
@@ -199,6 +199,15 @@ def _add_regularization_args(parser):
...
@@ -199,6 +199,15 @@ def _add_regularization_args(parser):
help
=
'Weight decay coefficient for L2 regularization.'
)
help
=
'Weight decay coefficient for L2 regularization.'
)
group
.
add_argument
(
'--clip-grad'
,
type
=
float
,
default
=
1.0
,
group
.
add_argument
(
'--clip-grad'
,
type
=
float
,
default
=
1.0
,
help
=
'Gradient clipping based on global L2 norm.'
)
help
=
'Gradient clipping based on global L2 norm.'
)
group
.
add_argument
(
'--adam-beta1'
,
type
=
float
,
default
=
0.9
,
help
=
'First coefficient for computing running averages of'
'gradient and its square'
)
group
.
add_argument
(
'--adam-beta2'
,
type
=
float
,
default
=
0.999
,
help
=
'Second coefficient for computing running averages of'
'gradient and its square'
)
group
.
add_argument
(
'--adam-eps'
,
type
=
float
,
default
=
1e-08
,
help
=
'term added to the denominator to improve'
'numerical stability'
)
return
parser
return
parser
...
...
megatron/training.py
View file @
48269d8d
...
@@ -164,7 +164,8 @@ def get_optimizer(model):
...
@@ -164,7 +164,8 @@ def get_optimizer(model):
param
.
model_parallel
=
False
param
.
model_parallel
=
False
# Use Adam.
# Use Adam.
optimizer
=
Adam
(
param_groups
,
lr
=
args
.
lr
,
weight_decay
=
args
.
weight_decay
)
optimizer
=
Adam
(
param_groups
,
lr
=
args
.
lr
,
weight_decay
=
args
.
weight_decay
,
betas
=
(
args
.
adam_beta1
,
args
.
adam_beta2
),
eps
=
args
.
adam_eps
)
# Wrap into fp16 optimizer.
# Wrap into fp16 optimizer.
if
args
.
fp16
:
if
args
.
fp16
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment