Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
51e980ce
Unverified
Commit
51e980ce
authored
Aug 30, 2019
by
Thomas Wolf
Committed by
GitHub
Aug 30, 2019
Browse files
Merge pull request #1155 from anhnt170489/apex_fp16
Update apex fp16 implementation
parents
206c35e9
2fb9a934
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
31 additions
and
16 deletions
+31
-16
examples/lm_finetuning/finetune_on_pregenerated.py
examples/lm_finetuning/finetune_on_pregenerated.py
+31
-16
No files found.
examples/lm_finetuning/finetune_on_pregenerated.py
View file @
51e980ce
...
...
@@ -235,8 +235,9 @@ def main():
# Prepare model
model
=
BertForPreTraining
.
from_pretrained
(
args
.
bert_model
)
if
args
.
fp16
:
model
.
half
()
# We don't need to manually call model.half() following Apex's recommend
# if args.fp16:
# model.half()
model
.
to
(
device
)
if
args
.
local_rank
!=
-
1
:
try
:
...
...
@@ -257,25 +258,36 @@ def main():
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.0
}
]
optimizer
=
AdamW
(
optimizer_grouped_parameters
,
lr
=
args
.
learning_rate
,
eps
=
args
.
adam_epsilon
)
scheduler
=
WarmupLinearSchedule
(
optimizer
,
warmup_steps
=
args
.
warmup_steps
,
t_total
=
num_train_optimization_steps
)
if
args
.
fp16
:
try
:
from
apex.optimizers
import
FP16_Optimizer
from
apex.optimizers
import
FusedAdam
# from apex.optimizers import FP16_Optimizer
# from apex.optimizers import FusedAdam
from
apex
import
amp
except
ImportError
:
raise
ImportError
(
"Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training."
)
optimizer
=
FusedAdam
(
optimizer_grouped_parameters
,
lr
=
args
.
learning_rate
,
bias_correction
=
False
,
max_grad_norm
=
1.0
)
if
args
.
loss_scale
==
0
:
optimizer
=
FP16_Optimizer
(
optimizer
,
dynamic_loss_scale
=
True
)
else
:
optimizer
=
FP16_Optimizer
(
optimizer
,
static_loss_scale
=
args
.
loss_scale
)
else
:
optimizer
=
AdamW
(
optimizer_grouped_parameters
,
lr
=
args
.
learning_rate
,
eps
=
args
.
adam_epsilon
)
scheduler
=
WarmupLinearSchedule
(
optimizer
,
warmup_steps
=
args
.
warmup_steps
,
t_total
=
num_train_optimization_steps
)
# This below line of code is the main upgrade of Apex Fp16 implementation. I chose opt_leve="01"
# because it's recommended for typical use by Apex. We can make it configured
model
,
optimizer
=
amp
.
initialize
(
model
,
optimizer
,
opt_level
=
"O1"
)
# We don't need to use FP16_Optimizer wrapping over FusedAdam as well. Now Apex supports all Pytorch Optimizer
# optimizer = FusedAdam(optimizer_grouped_parameters,
# lr=args.learning_rate,
# bias_correction=False,
# max_grad_norm=1.0)
# if args.loss_scale == 0:
# optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
# else:
# optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale)
# else:
# optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
# scheduler = WarmupLinearSchedule(optimizer, warmup_steps=args.warmup_steps, t_total=num_train_optimization_steps)
global_step
=
0
logging
.
info
(
"***** Running training *****"
)
...
...
@@ -304,7 +316,10 @@ def main():
if
args
.
gradient_accumulation_steps
>
1
:
loss
=
loss
/
args
.
gradient_accumulation_steps
if
args
.
fp16
:
optimizer
.
backward
(
loss
)
# I depricate FP16_Optimizer's backward func and replace as Apex document
# optimizer.backward(loss)
with
amp
.
scale_loss
(
loss
,
optimizer
)
as
scaled_loss
:
scaled_loss
.
backward
()
else
:
loss
.
backward
()
tr_loss
+=
loss
.
item
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment