Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
d8e3bdbb
Commit
d8e3bdbb
authored
Feb 05, 2019
by
thomwolf
Browse files
moved up to current master
parent
0ad9b239
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
20 additions
and
17 deletions
+20
-17
examples/run_squad.py
examples/run_squad.py
+20
-17
No files found.
examples/run_squad.py
View file @
d8e3bdbb
...
@@ -36,7 +36,7 @@ from torch.utils.data.distributed import DistributedSampler
...
@@ -36,7 +36,7 @@ from torch.utils.data.distributed import DistributedSampler
from
pytorch_pretrained_bert.tokenization
import
whitespace_tokenize
,
BasicTokenizer
,
BertTokenizer
from
pytorch_pretrained_bert.tokenization
import
whitespace_tokenize
,
BasicTokenizer
,
BertTokenizer
from
pytorch_pretrained_bert.modeling
import
BertForQuestionAnswering
from
pytorch_pretrained_bert.modeling
import
BertForQuestionAnswering
from
pytorch_pretrained_bert.optimization
import
BertAdam
from
pytorch_pretrained_bert.optimization
import
BertAdam
,
warmup_linear
from
pytorch_pretrained_bert.file_utils
import
PYTORCH_PRETRAINED_BERT_CACHE
from
pytorch_pretrained_bert.file_utils
import
PYTORCH_PRETRAINED_BERT_CACHE
logging
.
basicConfig
(
format
=
'%(asctime)s - %(levelname)s - %(name)s - %(message)s'
,
logging
.
basicConfig
(
format
=
'%(asctime)s - %(levelname)s - %(name)s - %(message)s'
,
...
@@ -785,7 +785,7 @@ def main():
...
@@ -785,7 +785,7 @@ def main():
parser
.
add_argument
(
"--num_train_epochs"
,
default
=
3.0
,
type
=
float
,
parser
.
add_argument
(
"--num_train_epochs"
,
default
=
3.0
,
type
=
float
,
help
=
"Total number of training epochs to perform."
)
help
=
"Total number of training epochs to perform."
)
parser
.
add_argument
(
"--warmup_proportion"
,
default
=
0.1
,
type
=
float
,
parser
.
add_argument
(
"--warmup_proportion"
,
default
=
0.1
,
type
=
float
,
help
=
"Proportion of training to perform linear learning rate warmup for. E.g., 0.1 = 10% "
help
=
"Proportion of training to perform linear learning rate warmup for. E.g., 0.1 = 10%
%
"
"of training."
)
"of training."
)
parser
.
add_argument
(
"--n_best_size"
,
default
=
20
,
type
=
int
,
parser
.
add_argument
(
"--n_best_size"
,
default
=
20
,
type
=
int
,
help
=
"The total number of n-best predictions to generate in the nbest_predictions.json "
help
=
"The total number of n-best predictions to generate in the nbest_predictions.json "
...
@@ -873,12 +873,14 @@ def main():
...
@@ -873,12 +873,14 @@ def main():
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
bert_model
,
do_lower_case
=
args
.
do_lower_case
)
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
bert_model
,
do_lower_case
=
args
.
do_lower_case
)
train_examples
=
None
train_examples
=
None
num_train_steps
=
None
num_train_
optimization_
steps
=
None
if
args
.
do_train
:
if
args
.
do_train
:
train_examples
=
read_squad_examples
(
train_examples
=
read_squad_examples
(
input_file
=
args
.
train_file
,
is_training
=
True
,
version_2_with_negative
=
args
.
version_2_with_negative
)
input_file
=
args
.
train_file
,
is_training
=
True
,
version_2_with_negative
=
args
.
version_2_with_negative
)
num_train_steps
=
int
(
num_train_optimization_steps
=
int
(
len
(
train_examples
)
/
args
.
train_batch_size
/
args
.
gradient_accumulation_steps
*
args
.
num_train_epochs
)
len
(
train_dataset
)
/
args
.
train_batch_size
/
args
.
gradient_accumulation_steps
)
*
args
.
num_train_epochs
if
args
.
local_rank
!=
-
1
:
num_train_optimization_steps
=
num_train_optimization_steps
//
torch
.
distributed
.
get_world_size
()
# Prepare model
# Prepare model
model
=
BertForQuestionAnswering
.
from_pretrained
(
args
.
bert_model
,
model
=
BertForQuestionAnswering
.
from_pretrained
(
args
.
bert_model
,
...
@@ -910,9 +912,6 @@ def main():
...
@@ -910,9 +912,6 @@ def main():
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.0
}
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.0
}
]
]
t_total
=
num_train_steps
if
args
.
local_rank
!=
-
1
:
t_total
=
t_total
//
torch
.
distributed
.
get_world_size
()
if
args
.
fp16
:
if
args
.
fp16
:
try
:
try
:
from
apex.optimizer
import
FP16_Optimizer
from
apex.optimizer
import
FP16_Optimizer
...
@@ -932,7 +931,7 @@ def main():
...
@@ -932,7 +931,7 @@ def main():
optimizer
=
BertAdam
(
optimizer_grouped_parameters
,
optimizer
=
BertAdam
(
optimizer_grouped_parameters
,
lr
=
args
.
learning_rate
,
lr
=
args
.
learning_rate
,
warmup
=
args
.
warmup_proportion
,
warmup
=
args
.
warmup_proportion
,
t_total
=
t_total
)
t_total
=
num_train_optimization_steps
)
global_step
=
0
global_step
=
0
if
args
.
do_train
:
if
args
.
do_train
:
...
@@ -958,7 +957,7 @@ def main():
...
@@ -958,7 +957,7 @@ def main():
logger
.
info
(
" Num orig examples = %d"
,
len
(
train_examples
))
logger
.
info
(
" Num orig examples = %d"
,
len
(
train_examples
))
logger
.
info
(
" Num split examples = %d"
,
len
(
train_features
))
logger
.
info
(
" Num split examples = %d"
,
len
(
train_features
))
logger
.
info
(
" Batch size = %d"
,
args
.
train_batch_size
)
logger
.
info
(
" Batch size = %d"
,
args
.
train_batch_size
)
logger
.
info
(
" Num steps = %d"
,
num_train_steps
)
logger
.
info
(
" Num steps = %d"
,
num_train_
optimization_
steps
)
all_input_ids
=
torch
.
tensor
([
f
.
input_ids
for
f
in
train_features
],
dtype
=
torch
.
long
)
all_input_ids
=
torch
.
tensor
([
f
.
input_ids
for
f
in
train_features
],
dtype
=
torch
.
long
)
all_input_mask
=
torch
.
tensor
([
f
.
input_mask
for
f
in
train_features
],
dtype
=
torch
.
long
)
all_input_mask
=
torch
.
tensor
([
f
.
input_mask
for
f
in
train_features
],
dtype
=
torch
.
long
)
all_segment_ids
=
torch
.
tensor
([
f
.
segment_ids
for
f
in
train_features
],
dtype
=
torch
.
long
)
all_segment_ids
=
torch
.
tensor
([
f
.
segment_ids
for
f
in
train_features
],
dtype
=
torch
.
long
)
...
@@ -989,10 +988,12 @@ def main():
...
@@ -989,10 +988,12 @@ def main():
else
:
else
:
loss
.
backward
()
loss
.
backward
()
if
(
step
+
1
)
%
args
.
gradient_accumulation_steps
==
0
:
if
(
step
+
1
)
%
args
.
gradient_accumulation_steps
==
0
:
# modify learning rate with special warm up BERT uses
if
args
.
fp16
:
lr_this_step
=
args
.
learning_rate
*
warmup_linear
(
global_step
/
t_total
,
args
.
warmup_proportion
)
# modify learning rate with special warm up BERT uses
for
param_group
in
optimizer
.
param_groups
:
# if args.fp16 is False, BertAdam is used and handles this automatically
param_group
[
'lr'
]
=
lr_this_step
lr_this_step
=
args
.
learning_rate
*
warmup_linear
(
global_step
/
num_train_optimization_steps
,
args
.
warmup_proportion
)
for
param_group
in
optimizer
.
param_groups
:
param_group
[
'lr'
]
=
lr_this_step
optimizer
.
step
()
optimizer
.
step
()
optimizer
.
zero_grad
()
optimizer
.
zero_grad
()
global_step
+=
1
global_step
+=
1
...
@@ -1002,10 +1003,12 @@ def main():
...
@@ -1002,10 +1003,12 @@ def main():
output_model_file
=
os
.
path
.
join
(
args
.
output_dir
,
"pytorch_model.bin"
)
output_model_file
=
os
.
path
.
join
(
args
.
output_dir
,
"pytorch_model.bin"
)
if
args
.
do_train
:
if
args
.
do_train
:
torch
.
save
(
model_to_save
.
state_dict
(),
output_model_file
)
torch
.
save
(
model_to_save
.
state_dict
(),
output_model_file
)
# Load a trained model that you have fine-tuned
model_state_dict
=
torch
.
load
(
output_model_file
)
model
=
BertForQuestionAnswering
.
from_pretrained
(
args
.
bert_model
,
state_dict
=
model_state_dict
)
else
:
model
=
BertForQuestionAnswering
.
from_pretrained
(
args
.
bert_model
)
# Load a trained model that you have fine-tuned
model_state_dict
=
torch
.
load
(
output_model_file
)
model
=
BertForQuestionAnswering
.
from_pretrained
(
args
.
bert_model
,
state_dict
=
model_state_dict
)
model
.
to
(
device
)
model
.
to
(
device
)
if
args
.
do_predict
and
(
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
):
if
args
.
do_predict
and
(
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment