Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
apex
Commits
43522e63
Commit
43522e63
authored
Feb 01, 2019
by
Michael Carilli
Browse files
Making static loss scale the default, and clipping master grads when running with --fp16
parent
aed3086a
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
19 additions
and
11 deletions
+19
-11
examples/imagenet/main.py
examples/imagenet/main.py
+3
-1
examples/word_language_model/main.py
examples/word_language_model/main.py
+15
-9
examples/word_language_model/main_fp16_optimizer.py
examples/word_language_model/main_fp16_optimizer.py
+1
-1
No files found.
examples/imagenet/main.py
View file @
43522e63
...
@@ -119,7 +119,9 @@ def main():
...
@@ -119,7 +119,9 @@ def main():
if
args
.
static_loss_scale
!=
1.0
:
if
args
.
static_loss_scale
!=
1.0
:
if
not
args
.
fp16
:
if
not
args
.
fp16
:
print
(
"Warning: if --fp16 is not used, static_loss_scale will be ignored."
)
print
(
"Warning: static_loss_scale != 1.0 is only necessary with --fp16. "
"Resetting static_loss_scale to 1.0"
)
args
.
static_loss_scale
=
1.0
# create model
# create model
if
args
.
pretrained
:
if
args
.
pretrained
:
...
...
examples/word_language_model/main.py
View file @
43522e63
...
@@ -47,7 +47,7 @@ parser.add_argument('--save', type=str, default='model.pt',
...
@@ -47,7 +47,7 @@ parser.add_argument('--save', type=str, default='model.pt',
help
=
'path to save the final model'
)
help
=
'path to save the final model'
)
parser
.
add_argument
(
'--fp16'
,
action
=
'store_true'
,
parser
.
add_argument
(
'--fp16'
,
action
=
'store_true'
,
help
=
'Run model in pseudo-fp16 mode (fp16 storage fp32 math).'
)
help
=
'Run model in pseudo-fp16 mode (fp16 storage fp32 math).'
)
parser
.
add_argument
(
'--static-loss-scale'
,
type
=
float
,
default
=
1
,
parser
.
add_argument
(
'--static-loss-scale'
,
type
=
float
,
default
=
1
28.0
,
help
=
'Static loss scale, positive power of 2 values can improve fp16 convergence.'
)
help
=
'Static loss scale, positive power of 2 values can improve fp16 convergence.'
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
...
@@ -118,6 +118,12 @@ if args.cuda and args.fp16:
...
@@ -118,6 +118,12 @@ if args.cuda and args.fp16:
model_params
,
master_params
=
prep_param_lists
(
model
)
model_params
,
master_params
=
prep_param_lists
(
model
)
elif
args
.
cuda
:
elif
args
.
cuda
:
model
.
cuda
()
model
.
cuda
()
if
(
not
args
.
fp16
)
or
(
not
args
.
cuda
):
print
(
"Warning: static_loss_scale != 1.0 is only necessary with --fp16. "
"Resetting static_loss_scale to 1.0"
)
args
.
static_loss_scale
=
1.0
criterion
=
nn
.
CrossEntropyLoss
()
criterion
=
nn
.
CrossEntropyLoss
()
###############################################################################
###############################################################################
...
@@ -184,21 +190,21 @@ def train():
...
@@ -184,21 +190,21 @@ def train():
loss
=
criterion
(
output
.
view
(
-
1
,
ntokens
),
targets
)
loss
=
criterion
(
output
.
view
(
-
1
,
ntokens
),
targets
)
loss
=
loss
*
args
.
static_loss_scale
loss
=
loss
*
args
.
static_loss_scale
loss
.
backward
()
loss
.
backward
()
loss
=
loss
/
args
.
static_loss_scale
loss
.
data
=
loss
.
data
/
args
.
static_loss_scale
# `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
# apex.fp16_utils.clip_grad_norm selects between "torch.nn.utils.clip_grad_norm"
# and "torch.nn.utils.clip_grad_norm_" based on Pytorch version.
# It's not FP16-specific, just a small fix to avoid deprecation warnings.
clip_grad_norm
(
model
.
parameters
(),
args
.
clip
)
if
args
.
fp16
and
args
.
cuda
:
if
args
.
fp16
and
args
.
cuda
:
model_grads_to_master_grads
(
model_params
,
master_params
)
model_grads_to_master_grads
(
model_params
,
master_params
)
if
args
.
static_loss_scale
!=
1
:
for
param
in
master_params
:
param
.
grad
.
data
=
param
.
grad
.
data
/
args
.
static_loss_scale
clip_grad_norm
(
master_params
,
args
.
clip
)
for
param
in
master_params
:
for
param
in
master_params
:
param
.
data
=
param
.
data
-
param
.
grad
.
data
*
(
lr
/
args
.
static_loss_scale
)
param
.
data
=
param
.
data
-
param
.
grad
.
data
*
lr
master_params_to_model_params
(
model_params
,
master_params
)
master_params_to_model_params
(
model_params
,
master_params
)
else
:
else
:
clip_grad_norm
(
model
.
parameters
(),
args
.
clip
)
for
p
in
model
.
parameters
():
for
p
in
model
.
parameters
():
p
.
data
.
add_
(
-
lr
/
args
.
static_loss_scale
,
p
.
grad
.
data
)
p
.
data
.
add_
(
-
lr
,
p
.
grad
.
data
)
total_loss
+=
loss
.
data
total_loss
+=
loss
.
data
...
...
examples/word_language_model/main_fp16_optimizer.py
View file @
43522e63
...
@@ -47,7 +47,7 @@ parser.add_argument('--save', type=str, default='model.pt',
...
@@ -47,7 +47,7 @@ parser.add_argument('--save', type=str, default='model.pt',
help
=
'path to save the final model'
)
help
=
'path to save the final model'
)
parser
.
add_argument
(
'--fp16'
,
action
=
'store_true'
,
parser
.
add_argument
(
'--fp16'
,
action
=
'store_true'
,
help
=
'Run model in pseudo-fp16 mode (fp16 storage fp32 math).'
)
help
=
'Run model in pseudo-fp16 mode (fp16 storage fp32 math).'
)
parser
.
add_argument
(
'--static-loss-scale'
,
type
=
float
,
default
=
1
,
parser
.
add_argument
(
'--static-loss-scale'
,
type
=
float
,
default
=
1
28.0
,
help
=
'Static loss scale, positive power of 2 values can improve fp16 convergence.'
)
help
=
'Static loss scale, positive power of 2 values can improve fp16 convergence.'
)
parser
.
add_argument
(
'--dynamic-loss-scale'
,
action
=
'store_true'
,
parser
.
add_argument
(
'--dynamic-loss-scale'
,
action
=
'store_true'
,
help
=
'Use dynamic loss scaling. If supplied, this argument supersedes '
+
help
=
'Use dynamic loss scaling. If supplied, this argument supersedes '
+
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment