Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
1579c536
"...lm-evaluation-harness.git" did not exist on "1ebf41d3f732bf2019a126f63c6b2beb9aa56edd"
Commit
1579c536
authored
Feb 05, 2019
by
thomwolf
Browse files
more explicit notation: num_train_step => num_train_optimization_steps
parent
51690699
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
44 additions
and
46 deletions
+44
-46
.gitignore
.gitignore
+4
-1
examples/run_classifier.py
examples/run_classifier.py
+8
-9
examples/run_lm_finetuning.py
examples/run_lm_finetuning.py
+8
-9
examples/run_squad.py
examples/run_squad.py
+8
-9
examples/run_squad2.py
examples/run_squad2.py
+8
-9
examples/run_swag.py
examples/run_swag.py
+8
-9
No files found.
.gitignore
View file @
1579c536
...
...
@@ -120,3 +120,6 @@ dmypy.json
# TF code
tensorflow_code
# Models
models
\ No newline at end of file
examples/run_classifier.py
View file @
1579c536
...
...
@@ -438,11 +438,13 @@ def main():
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
bert_model
,
do_lower_case
=
args
.
do_lower_case
)
train_examples
=
None
num_train_steps
=
None
num_train_
optimization_
steps
=
None
if
args
.
do_train
:
train_examples
=
processor
.
get_train_examples
(
args
.
data_dir
)
num_train_steps
=
len
(
train_examples
)
//
args
.
train_batch_size
//
args
.
gradient_accumulation_steps
*
args
.
num_train_epochs
num_train_optimization_steps
=
int
(
len
(
train_examples
)
/
args
.
train_batch_size
/
args
.
gradient_accumulation_steps
)
*
args
.
num_train_epochs
if
args
.
local_rank
!=
-
1
:
num_train_optimization_steps
=
num_train_optimization_steps
//
torch
.
distributed
.
get_world_size
()
# Prepare model
model
=
BertForSequenceClassification
.
from_pretrained
(
args
.
bert_model
,
...
...
@@ -468,9 +470,6 @@ def main():
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
not
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.01
},
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.0
}
]
t_total
=
num_train_steps
if
args
.
local_rank
!=
-
1
:
t_total
=
t_total
//
torch
.
distributed
.
get_world_size
()
if
args
.
fp16
:
try
:
from
apex.optimizers
import
FP16_Optimizer
...
...
@@ -491,7 +490,7 @@ def main():
optimizer
=
BertAdam
(
optimizer_grouped_parameters
,
lr
=
args
.
learning_rate
,
warmup
=
args
.
warmup_proportion
,
t_total
=
t_total
)
t_total
=
num_train_optimization_steps
)
global_step
=
0
nb_tr_steps
=
0
...
...
@@ -502,7 +501,7 @@ def main():
logger
.
info
(
"***** Running training *****"
)
logger
.
info
(
" Num examples = %d"
,
len
(
train_examples
))
logger
.
info
(
" Batch size = %d"
,
args
.
train_batch_size
)
logger
.
info
(
" Num steps = %d"
,
num_train_steps
)
logger
.
info
(
" Num steps = %d"
,
num_train_
optimization_
steps
)
all_input_ids
=
torch
.
tensor
([
f
.
input_ids
for
f
in
train_features
],
dtype
=
torch
.
long
)
all_input_mask
=
torch
.
tensor
([
f
.
input_mask
for
f
in
train_features
],
dtype
=
torch
.
long
)
all_segment_ids
=
torch
.
tensor
([
f
.
segment_ids
for
f
in
train_features
],
dtype
=
torch
.
long
)
...
...
@@ -539,7 +538,7 @@ def main():
if
args
.
fp16
:
# modify learning rate with special warm up BERT uses
# if args.fp16 is False, BertAdam is used that handles this automatically
lr_this_step
=
args
.
learning_rate
*
warmup_linear
(
global_step
/
t_total
,
args
.
warmup_proportion
)
lr_this_step
=
args
.
learning_rate
*
warmup_linear
(
global_step
/
num_train_optimization_steps
,
args
.
warmup_proportion
)
for
param_group
in
optimizer
.
param_groups
:
param_group
[
'lr'
]
=
lr_this_step
optimizer
.
step
()
...
...
examples/run_lm_finetuning.py
View file @
1579c536
...
...
@@ -515,13 +515,15 @@ def main():
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
bert_model
,
do_lower_case
=
args
.
do_lower_case
)
#train_examples = None
num_train_steps
=
None
num_train_
optimization_
steps
=
None
if
args
.
do_train
:
print
(
"Loading Train Dataset"
,
args
.
train_file
)
train_dataset
=
BERTDataset
(
args
.
train_file
,
tokenizer
,
seq_len
=
args
.
max_seq_length
,
corpus_lines
=
None
,
on_memory
=
args
.
on_memory
)
num_train_steps
=
len
(
train_dataset
)
//
args
.
train_batch_size
//
args
.
gradient_accumulation_steps
*
args
.
num_train_epochs
num_train_optimization_steps
=
int
(
len
(
train_dataset
)
/
args
.
train_batch_size
/
args
.
gradient_accumulation_steps
)
*
args
.
num_train_epochs
if
args
.
local_rank
!=
-
1
:
num_train_optimization_steps
=
num_train_optimization_steps
//
torch
.
distributed
.
get_world_size
()
# Prepare model
model
=
BertForPreTraining
.
from_pretrained
(
args
.
bert_model
)
...
...
@@ -545,9 +547,6 @@ def main():
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.0
}
]
t_total
=
num_train_steps
if
args
.
local_rank
!=
-
1
:
t_total
=
t_total
//
torch
.
distributed
.
get_world_size
()
if
args
.
fp16
:
try
:
from
apex.optimizers
import
FP16_Optimizer
...
...
@@ -568,14 +567,14 @@ def main():
optimizer
=
BertAdam
(
optimizer_grouped_parameters
,
lr
=
args
.
learning_rate
,
warmup
=
args
.
warmup_proportion
,
t_total
=
t_total
)
t_total
=
num_train_optimization_steps
)
global_step
=
0
if
args
.
do_train
:
logger
.
info
(
"***** Running training *****"
)
logger
.
info
(
" Num examples = %d"
,
len
(
train_dataset
))
logger
.
info
(
" Batch size = %d"
,
args
.
train_batch_size
)
logger
.
info
(
" Num steps = %d"
,
num_train_steps
)
logger
.
info
(
" Num steps = %d"
,
num_train_
optimization_
steps
)
if
args
.
local_rank
==
-
1
:
train_sampler
=
RandomSampler
(
train_dataset
)
...
...
@@ -608,7 +607,7 @@ def main():
if
args
.
fp16
:
# modify learning rate with special warm up BERT uses
# if args.fp16 is False, BertAdam is used that handles this automatically
lr_this_step
=
args
.
learning_rate
*
warmup_linear
(
global_step
/
t_total
,
args
.
warmup_proportion
)
lr_this_step
=
args
.
learning_rate
*
warmup_linear
(
global_step
/
num_train_optimization_steps
,
args
.
warmup_proportion
)
for
param_group
in
optimizer
.
param_groups
:
param_group
[
'lr'
]
=
lr_this_step
optimizer
.
step
()
...
...
examples/run_squad.py
View file @
1579c536
...
...
@@ -784,12 +784,14 @@ def main():
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
bert_model
,
do_lower_case
=
args
.
do_lower_case
)
train_examples
=
None
num_train_steps
=
None
num_train_
optimization_
steps
=
None
if
args
.
do_train
:
train_examples
=
read_squad_examples
(
input_file
=
args
.
train_file
,
is_training
=
True
)
num_train_steps
=
len
(
train_examples
)
//
args
.
train_batch_size
//
args
.
gradient_accumulation_steps
*
args
.
num_train_epochs
num_train_optimization_steps
=
int
(
len
(
train_examples
)
/
args
.
train_batch_size
/
args
.
gradient_accumulation_steps
)
*
args
.
num_train_epochs
if
args
.
local_rank
!=
-
1
:
num_train_optimization_steps
=
num_train_optimization_steps
//
torch
.
distributed
.
get_world_size
()
# Prepare model
model
=
BertForQuestionAnswering
.
from_pretrained
(
args
.
bert_model
,
...
...
@@ -821,9 +823,6 @@ def main():
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.0
}
]
t_total
=
num_train_steps
if
args
.
local_rank
!=
-
1
:
t_total
=
t_total
//
torch
.
distributed
.
get_world_size
()
if
args
.
fp16
:
try
:
from
apex.optimizers
import
FP16_Optimizer
...
...
@@ -843,7 +842,7 @@ def main():
optimizer
=
BertAdam
(
optimizer_grouped_parameters
,
lr
=
args
.
learning_rate
,
warmup
=
args
.
warmup_proportion
,
t_total
=
t_total
)
t_total
=
num_train_optimization_steps
)
global_step
=
0
if
args
.
do_train
:
...
...
@@ -869,7 +868,7 @@ def main():
logger
.
info
(
" Num orig examples = %d"
,
len
(
train_examples
))
logger
.
info
(
" Num split examples = %d"
,
len
(
train_features
))
logger
.
info
(
" Batch size = %d"
,
args
.
train_batch_size
)
logger
.
info
(
" Num steps = %d"
,
num_train_steps
)
logger
.
info
(
" Num steps = %d"
,
num_train_
optimization_
steps
)
all_input_ids
=
torch
.
tensor
([
f
.
input_ids
for
f
in
train_features
],
dtype
=
torch
.
long
)
all_input_mask
=
torch
.
tensor
([
f
.
input_mask
for
f
in
train_features
],
dtype
=
torch
.
long
)
all_segment_ids
=
torch
.
tensor
([
f
.
segment_ids
for
f
in
train_features
],
dtype
=
torch
.
long
)
...
...
@@ -903,7 +902,7 @@ def main():
if
args
.
fp16
:
# modify learning rate with special warm up BERT uses
# if args.fp16 is False, BertAdam is used that handles this automatically
lr_this_step
=
args
.
learning_rate
*
warmup_linear
(
global_step
/
t_total
,
args
.
warmup_proportion
)
lr_this_step
=
args
.
learning_rate
*
warmup_linear
(
global_step
/
num_train_optimization_steps
,
args
.
warmup_proportion
)
for
param_group
in
optimizer
.
param_groups
:
param_group
[
'lr'
]
=
lr_this_step
optimizer
.
step
()
...
...
examples/run_squad2.py
View file @
1579c536
...
...
@@ -877,12 +877,14 @@ def main():
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
bert_model
)
train_examples
=
None
num_train_steps
=
None
num_train_
optimization_
steps
=
None
if
args
.
do_train
:
train_examples
=
read_squad_examples
(
input_file
=
args
.
train_file
,
is_training
=
True
)
num_train_steps
=
len
(
train_examples
)
//
args
.
train_batch_size
//
args
.
gradient_accumulation_steps
*
args
.
num_train_epochs
num_train_optimization_steps
=
int
(
len
(
train_examples
)
/
args
.
train_batch_size
/
args
.
gradient_accumulation_steps
)
*
args
.
num_train_epochs
if
args
.
local_rank
!=
-
1
:
num_train_optimization_steps
=
num_train_optimization_steps
//
torch
.
distributed
.
get_world_size
()
# Prepare model
model
=
BertForQuestionAnswering
.
from_pretrained
(
args
.
bert_model
,
...
...
@@ -914,9 +916,6 @@ def main():
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.0
}
]
t_total
=
num_train_steps
if
args
.
local_rank
!=
-
1
:
t_total
=
t_total
//
torch
.
distributed
.
get_world_size
()
if
args
.
fp16
:
try
:
from
apex.optimizers
import
FP16_Optimizer
...
...
@@ -936,7 +935,7 @@ def main():
optimizer
=
BertAdam
(
optimizer_grouped_parameters
,
lr
=
args
.
learning_rate
,
warmup
=
args
.
warmup_proportion
,
t_total
=
t_total
)
t_total
=
num_train_optimization_steps
)
global_step
=
0
if
args
.
do_train
:
...
...
@@ -962,7 +961,7 @@ def main():
logger
.
info
(
" Num orig examples = %d"
,
len
(
train_examples
))
logger
.
info
(
" Num split examples = %d"
,
len
(
train_features
))
logger
.
info
(
" Batch size = %d"
,
args
.
train_batch_size
)
logger
.
info
(
" Num steps = %d"
,
num_train_steps
)
logger
.
info
(
" Num steps = %d"
,
num_train_
optimization_
steps
)
all_input_ids
=
torch
.
tensor
([
f
.
input_ids
for
f
in
train_features
],
dtype
=
torch
.
long
)
all_input_mask
=
torch
.
tensor
([
f
.
input_mask
for
f
in
train_features
],
dtype
=
torch
.
long
)
all_segment_ids
=
torch
.
tensor
([
f
.
segment_ids
for
f
in
train_features
],
dtype
=
torch
.
long
)
...
...
@@ -997,7 +996,7 @@ def main():
if
args
.
fp16
:
# modify learning rate with special warm up BERT uses
# if args.fp16 is False, BertAdam is used that handles this automatically
lr_this_step
=
args
.
learning_rate
*
warmup_linear
(
global_step
/
t_total
,
args
.
warmup_proportion
)
lr_this_step
=
args
.
learning_rate
*
warmup_linear
(
global_step
/
num_train_optimization_steps
,
args
.
warmup_proportion
)
for
param_group
in
optimizer
.
param_groups
:
param_group
[
'lr'
]
=
lr_this_step
optimizer
.
step
()
...
...
examples/run_swag.py
View file @
1579c536
...
...
@@ -349,11 +349,13 @@ def main():
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
bert_model
,
do_lower_case
=
args
.
do_lower_case
)
train_examples
=
None
num_train_steps
=
None
num_train_
optimization_
steps
=
None
if
args
.
do_train
:
train_examples
=
read_swag_examples
(
os
.
path
.
join
(
args
.
data_dir
,
'train.csv'
),
is_training
=
True
)
num_train_steps
=
len
(
train_examples
)
//
args
.
train_batch_size
//
args
.
gradient_accumulation_steps
*
args
.
num_train_epochs
num_train_optimization_steps
=
int
(
len
(
train_examples
)
/
args
.
train_batch_size
/
args
.
gradient_accumulation_steps
)
*
args
.
num_train_epochs
if
args
.
local_rank
!=
-
1
:
num_train_optimization_steps
=
num_train_optimization_steps
//
torch
.
distributed
.
get_world_size
()
# Prepare model
model
=
BertForMultipleChoice
.
from_pretrained
(
args
.
bert_model
,
...
...
@@ -384,9 +386,6 @@ def main():
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
not
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.01
},
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.0
}
]
t_total
=
num_train_steps
if
args
.
local_rank
!=
-
1
:
t_total
=
t_total
//
torch
.
distributed
.
get_world_size
()
if
args
.
fp16
:
try
:
from
apex.optimizers
import
FP16_Optimizer
...
...
@@ -406,7 +405,7 @@ def main():
optimizer
=
BertAdam
(
optimizer_grouped_parameters
,
lr
=
args
.
learning_rate
,
warmup
=
args
.
warmup_proportion
,
t_total
=
t_total
)
t_total
=
num_train_optimization_steps
)
global_step
=
0
if
args
.
do_train
:
...
...
@@ -415,7 +414,7 @@ def main():
logger
.
info
(
"***** Running training *****"
)
logger
.
info
(
" Num examples = %d"
,
len
(
train_examples
))
logger
.
info
(
" Batch size = %d"
,
args
.
train_batch_size
)
logger
.
info
(
" Num steps = %d"
,
num_train_steps
)
logger
.
info
(
" Num steps = %d"
,
num_train_
optimization_
steps
)
all_input_ids
=
torch
.
tensor
(
select_field
(
train_features
,
'input_ids'
),
dtype
=
torch
.
long
)
all_input_mask
=
torch
.
tensor
(
select_field
(
train_features
,
'input_mask'
),
dtype
=
torch
.
long
)
all_segment_ids
=
torch
.
tensor
(
select_field
(
train_features
,
'segment_ids'
),
dtype
=
torch
.
long
)
...
...
@@ -455,7 +454,7 @@ def main():
if
args
.
fp16
:
# modify learning rate with special warm up BERT uses
# if args.fp16 is False, BertAdam is used that handles this automatically
lr_this_step
=
args
.
learning_rate
*
warmup_linear
(
global_step
/
t_total
,
args
.
warmup_proportion
)
lr_this_step
=
args
.
learning_rate
*
warmup_linear
(
global_step
/
num_train_optimization_steps
,
args
.
warmup_proportion
)
for
param_group
in
optimizer
.
param_groups
:
param_group
[
'lr'
]
=
lr_this_step
optimizer
.
step
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment