Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
d94c6b01
"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "186c0775132fecdc571f3996f75c7e1377d5fb9b"
Commit
d94c6b01
authored
Apr 23, 2019
by
thomwolf
Browse files
fix training schedules in examples to match new API
parent
c36cca07
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
25 additions
and
12 deletions
+25
-12
examples/lm_finetuning/finetune_on_pregenerated.py
examples/lm_finetuning/finetune_on_pregenerated.py
+5
-4
examples/lm_finetuning/simple_lm_finetuning.py
examples/lm_finetuning/simple_lm_finetuning.py
+5
-2
examples/run_classifier.py
examples/run_classifier.py
+5
-2
examples/run_squad.py
examples/run_squad.py
+5
-2
examples/run_swag.py
examples/run_swag.py
+5
-2
No files found.
examples/lm_finetuning/finetune_on_pregenerated.py
View file @
d94c6b01
...
@@ -14,7 +14,7 @@ from tqdm import tqdm
...
@@ -14,7 +14,7 @@ from tqdm import tqdm
from
pytorch_pretrained_bert.modeling
import
BertForPreTraining
from
pytorch_pretrained_bert.modeling
import
BertForPreTraining
from
pytorch_pretrained_bert.tokenization
import
BertTokenizer
from
pytorch_pretrained_bert.tokenization
import
BertTokenizer
from
pytorch_pretrained_bert.optimization
import
BertAdam
,
w
armup
_l
inear
from
pytorch_pretrained_bert.optimization
import
BertAdam
,
W
armup
L
inear
Schedule
InputFeatures
=
namedtuple
(
"InputFeatures"
,
"input_ids input_mask segment_ids lm_label_ids is_next"
)
InputFeatures
=
namedtuple
(
"InputFeatures"
,
"input_ids input_mask segment_ids lm_label_ids is_next"
)
...
@@ -268,7 +268,8 @@ def main():
...
@@ -268,7 +268,8 @@ def main():
optimizer
=
FP16_Optimizer
(
optimizer
,
dynamic_loss_scale
=
True
)
optimizer
=
FP16_Optimizer
(
optimizer
,
dynamic_loss_scale
=
True
)
else
:
else
:
optimizer
=
FP16_Optimizer
(
optimizer
,
static_loss_scale
=
args
.
loss_scale
)
optimizer
=
FP16_Optimizer
(
optimizer
,
static_loss_scale
=
args
.
loss_scale
)
warmup_linear
=
WarmupLinearSchedule
(
warmup
=
args
.
warmup_proportion
,
t_total
=
num_train_optimization_steps
)
else
:
else
:
optimizer
=
BertAdam
(
optimizer_grouped_parameters
,
optimizer
=
BertAdam
(
optimizer_grouped_parameters
,
lr
=
args
.
learning_rate
,
lr
=
args
.
learning_rate
,
...
@@ -314,7 +315,7 @@ def main():
...
@@ -314,7 +315,7 @@ def main():
if
args
.
fp16
:
if
args
.
fp16
:
# modify learning rate with special warm up BERT uses
# modify learning rate with special warm up BERT uses
# if args.fp16 is False, BertAdam is used that handles this automatically
# if args.fp16 is False, BertAdam is used that handles this automatically
lr_this_step
=
args
.
learning_rate
*
warmup_linear
(
global_step
/
num_train_optimization_steps
,
lr_this_step
=
args
.
learning_rate
*
warmup_linear
.
get_lr
(
global_step
/
num_train_optimization_steps
,
args
.
warmup_proportion
)
args
.
warmup_proportion
)
for
param_group
in
optimizer
.
param_groups
:
for
param_group
in
optimizer
.
param_groups
:
param_group
[
'lr'
]
=
lr_this_step
param_group
[
'lr'
]
=
lr_this_step
...
...
examples/lm_finetuning/simple_lm_finetuning.py
View file @
d94c6b01
...
@@ -31,7 +31,7 @@ from tqdm import tqdm, trange
...
@@ -31,7 +31,7 @@ from tqdm import tqdm, trange
from
pytorch_pretrained_bert.modeling
import
BertForPreTraining
from
pytorch_pretrained_bert.modeling
import
BertForPreTraining
from
pytorch_pretrained_bert.tokenization
import
BertTokenizer
from
pytorch_pretrained_bert.tokenization
import
BertTokenizer
from
pytorch_pretrained_bert.optimization
import
BertAdam
,
w
armup
_l
inear
from
pytorch_pretrained_bert.optimization
import
BertAdam
,
W
armup
L
inear
Schedule
logging
.
basicConfig
(
format
=
'%(asctime)s - %(levelname)s - %(name)s - %(message)s'
,
logging
.
basicConfig
(
format
=
'%(asctime)s - %(levelname)s - %(name)s - %(message)s'
,
datefmt
=
'%m/%d/%Y %H:%M:%S'
,
datefmt
=
'%m/%d/%Y %H:%M:%S'
,
...
@@ -556,6 +556,8 @@ def main():
...
@@ -556,6 +556,8 @@ def main():
optimizer
=
FP16_Optimizer
(
optimizer
,
dynamic_loss_scale
=
True
)
optimizer
=
FP16_Optimizer
(
optimizer
,
dynamic_loss_scale
=
True
)
else
:
else
:
optimizer
=
FP16_Optimizer
(
optimizer
,
static_loss_scale
=
args
.
loss_scale
)
optimizer
=
FP16_Optimizer
(
optimizer
,
static_loss_scale
=
args
.
loss_scale
)
warmup_linear
=
WarmupLinearSchedule
(
warmup
=
args
.
warmup_proportion
,
t_total
=
num_train_optimization_steps
)
else
:
else
:
optimizer
=
BertAdam
(
optimizer_grouped_parameters
,
optimizer
=
BertAdam
(
optimizer_grouped_parameters
,
...
@@ -601,7 +603,8 @@ def main():
...
@@ -601,7 +603,8 @@ def main():
if
args
.
fp16
:
if
args
.
fp16
:
# modify learning rate with special warm up BERT uses
# modify learning rate with special warm up BERT uses
# if args.fp16 is False, BertAdam is used that handles this automatically
# if args.fp16 is False, BertAdam is used that handles this automatically
lr_this_step
=
args
.
learning_rate
*
warmup_linear
(
global_step
/
num_train_optimization_steps
,
args
.
warmup_proportion
)
lr_this_step
=
args
.
learning_rate
*
warmup_linear
.
get_lr
(
global_step
/
num_train_optimization_steps
,
args
.
warmup_proportion
)
for
param_group
in
optimizer
.
param_groups
:
for
param_group
in
optimizer
.
param_groups
:
param_group
[
'lr'
]
=
lr_this_step
param_group
[
'lr'
]
=
lr_this_step
optimizer
.
step
()
optimizer
.
step
()
...
...
examples/run_classifier.py
View file @
d94c6b01
...
@@ -38,7 +38,7 @@ from sklearn.metrics import matthews_corrcoef, f1_score
...
@@ -38,7 +38,7 @@ from sklearn.metrics import matthews_corrcoef, f1_score
from
pytorch_pretrained_bert.file_utils
import
PYTORCH_PRETRAINED_BERT_CACHE
,
WEIGHTS_NAME
,
CONFIG_NAME
from
pytorch_pretrained_bert.file_utils
import
PYTORCH_PRETRAINED_BERT_CACHE
,
WEIGHTS_NAME
,
CONFIG_NAME
from
pytorch_pretrained_bert.modeling
import
BertForSequenceClassification
,
BertConfig
from
pytorch_pretrained_bert.modeling
import
BertForSequenceClassification
,
BertConfig
from
pytorch_pretrained_bert.tokenization
import
BertTokenizer
from
pytorch_pretrained_bert.tokenization
import
BertTokenizer
from
pytorch_pretrained_bert.optimization
import
BertAdam
,
w
armup
_l
inear
from
pytorch_pretrained_bert.optimization
import
BertAdam
,
W
armup
L
inear
Schedule
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -784,6 +784,8 @@ def main():
...
@@ -784,6 +784,8 @@ def main():
optimizer
=
FP16_Optimizer
(
optimizer
,
dynamic_loss_scale
=
True
)
optimizer
=
FP16_Optimizer
(
optimizer
,
dynamic_loss_scale
=
True
)
else
:
else
:
optimizer
=
FP16_Optimizer
(
optimizer
,
static_loss_scale
=
args
.
loss_scale
)
optimizer
=
FP16_Optimizer
(
optimizer
,
static_loss_scale
=
args
.
loss_scale
)
warmup_linear
=
WarmupLinearSchedule
(
warmup
=
args
.
warmup_proportion
,
t_total
=
num_train_optimization_steps
)
else
:
else
:
optimizer
=
BertAdam
(
optimizer_grouped_parameters
,
optimizer
=
BertAdam
(
optimizer_grouped_parameters
,
...
@@ -852,7 +854,8 @@ def main():
...
@@ -852,7 +854,8 @@ def main():
if
args
.
fp16
:
if
args
.
fp16
:
# modify learning rate with special warm up BERT uses
# modify learning rate with special warm up BERT uses
# if args.fp16 is False, BertAdam is used that handles this automatically
# if args.fp16 is False, BertAdam is used that handles this automatically
lr_this_step
=
args
.
learning_rate
*
warmup_linear
(
global_step
/
num_train_optimization_steps
,
args
.
warmup_proportion
)
lr_this_step
=
args
.
learning_rate
*
warmup_linear
.
get_lr
(
global_step
/
num_train_optimization_steps
,
args
.
warmup_proportion
)
for
param_group
in
optimizer
.
param_groups
:
for
param_group
in
optimizer
.
param_groups
:
param_group
[
'lr'
]
=
lr_this_step
param_group
[
'lr'
]
=
lr_this_step
optimizer
.
step
()
optimizer
.
step
()
...
...
examples/run_squad.py
View file @
d94c6b01
...
@@ -36,7 +36,7 @@ from tqdm import tqdm, trange
...
@@ -36,7 +36,7 @@ from tqdm import tqdm, trange
from
pytorch_pretrained_bert.file_utils
import
PYTORCH_PRETRAINED_BERT_CACHE
,
WEIGHTS_NAME
,
CONFIG_NAME
from
pytorch_pretrained_bert.file_utils
import
PYTORCH_PRETRAINED_BERT_CACHE
,
WEIGHTS_NAME
,
CONFIG_NAME
from
pytorch_pretrained_bert.modeling
import
BertForQuestionAnswering
,
BertConfig
from
pytorch_pretrained_bert.modeling
import
BertForQuestionAnswering
,
BertConfig
from
pytorch_pretrained_bert.optimization
import
BertAdam
,
w
armup
_l
inear
from
pytorch_pretrained_bert.optimization
import
BertAdam
,
W
armup
L
inear
Schedule
from
pytorch_pretrained_bert.tokenization
import
(
BasicTokenizer
,
from
pytorch_pretrained_bert.tokenization
import
(
BasicTokenizer
,
BertTokenizer
,
BertTokenizer
,
whitespace_tokenize
)
whitespace_tokenize
)
...
@@ -949,6 +949,8 @@ def main():
...
@@ -949,6 +949,8 @@ def main():
optimizer
=
FP16_Optimizer
(
optimizer
,
dynamic_loss_scale
=
True
)
optimizer
=
FP16_Optimizer
(
optimizer
,
dynamic_loss_scale
=
True
)
else
:
else
:
optimizer
=
FP16_Optimizer
(
optimizer
,
static_loss_scale
=
args
.
loss_scale
)
optimizer
=
FP16_Optimizer
(
optimizer
,
static_loss_scale
=
args
.
loss_scale
)
warmup_linear
=
WarmupLinearSchedule
(
warmup
=
args
.
warmup_proportion
,
t_total
=
num_train_optimization_steps
)
else
:
else
:
optimizer
=
BertAdam
(
optimizer_grouped_parameters
,
optimizer
=
BertAdam
(
optimizer_grouped_parameters
,
lr
=
args
.
learning_rate
,
lr
=
args
.
learning_rate
,
...
@@ -1013,7 +1015,8 @@ def main():
...
@@ -1013,7 +1015,8 @@ def main():
if
args
.
fp16
:
if
args
.
fp16
:
# modify learning rate with special warm up BERT uses
# modify learning rate with special warm up BERT uses
# if args.fp16 is False, BertAdam is used and handles this automatically
# if args.fp16 is False, BertAdam is used and handles this automatically
lr_this_step
=
args
.
learning_rate
*
warmup_linear
(
global_step
/
num_train_optimization_steps
,
args
.
warmup_proportion
)
lr_this_step
=
args
.
learning_rate
*
warmup_linear
.
get_lr
(
global_step
/
num_train_optimization_steps
,
args
.
warmup_proportion
)
for
param_group
in
optimizer
.
param_groups
:
for
param_group
in
optimizer
.
param_groups
:
param_group
[
'lr'
]
=
lr_this_step
param_group
[
'lr'
]
=
lr_this_step
optimizer
.
step
()
optimizer
.
step
()
...
...
examples/run_swag.py
View file @
d94c6b01
...
@@ -34,7 +34,7 @@ from tqdm import tqdm, trange
...
@@ -34,7 +34,7 @@ from tqdm import tqdm, trange
from
pytorch_pretrained_bert.file_utils
import
PYTORCH_PRETRAINED_BERT_CACHE
,
WEIGHTS_NAME
,
CONFIG_NAME
from
pytorch_pretrained_bert.file_utils
import
PYTORCH_PRETRAINED_BERT_CACHE
,
WEIGHTS_NAME
,
CONFIG_NAME
from
pytorch_pretrained_bert.modeling
import
BertForMultipleChoice
,
BertConfig
from
pytorch_pretrained_bert.modeling
import
BertForMultipleChoice
,
BertConfig
from
pytorch_pretrained_bert.optimization
import
BertAdam
,
w
armup
_l
inear
from
pytorch_pretrained_bert.optimization
import
BertAdam
,
W
armup
L
inear
Schedule
from
pytorch_pretrained_bert.tokenization
import
BertTokenizer
from
pytorch_pretrained_bert.tokenization
import
BertTokenizer
logging
.
basicConfig
(
format
=
'%(asctime)s - %(levelname)s - %(name)s - %(message)s'
,
logging
.
basicConfig
(
format
=
'%(asctime)s - %(levelname)s - %(name)s - %(message)s'
,
...
@@ -411,6 +411,8 @@ def main():
...
@@ -411,6 +411,8 @@ def main():
optimizer
=
FP16_Optimizer
(
optimizer
,
dynamic_loss_scale
=
True
)
optimizer
=
FP16_Optimizer
(
optimizer
,
dynamic_loss_scale
=
True
)
else
:
else
:
optimizer
=
FP16_Optimizer
(
optimizer
,
static_loss_scale
=
args
.
loss_scale
)
optimizer
=
FP16_Optimizer
(
optimizer
,
static_loss_scale
=
args
.
loss_scale
)
warmup_linear
=
WarmupLinearSchedule
(
warmup
=
args
.
warmup_proportion
,
t_total
=
num_train_optimization_steps
)
else
:
else
:
optimizer
=
BertAdam
(
optimizer_grouped_parameters
,
optimizer
=
BertAdam
(
optimizer_grouped_parameters
,
lr
=
args
.
learning_rate
,
lr
=
args
.
learning_rate
,
...
@@ -464,7 +466,8 @@ def main():
...
@@ -464,7 +466,8 @@ def main():
if
args
.
fp16
:
if
args
.
fp16
:
# modify learning rate with special warm up BERT uses
# modify learning rate with special warm up BERT uses
# if args.fp16 is False, BertAdam is used that handles this automatically
# if args.fp16 is False, BertAdam is used that handles this automatically
lr_this_step
=
args
.
learning_rate
*
warmup_linear
(
global_step
/
num_train_optimization_steps
,
args
.
warmup_proportion
)
lr_this_step
=
args
.
learning_rate
*
warmup_linear
.
get_lr
(
global_step
/
num_train_optimization_steps
,
args
.
warmup_proportion
)
for
param_group
in
optimizer
.
param_groups
:
for
param_group
in
optimizer
.
param_groups
:
param_group
[
'lr'
]
=
lr_this_step
param_group
[
'lr'
]
=
lr_this_step
optimizer
.
step
()
optimizer
.
step
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment