Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
8e81e5e6
Commit
8e81e5e6
authored
Nov 02, 2018
by
thomwolf
Browse files
working on squad
parent
e61db0d1
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
62 additions
and
66 deletions
+62
-66
run_classifier_pytorch.py
run_classifier_pytorch.py
+1
-1
run_squad_pytorch.py
run_squad_pytorch.py
+61
-65
No files found.
run_classifier_pytorch.py
View file @
8e81e5e6
...
@@ -440,7 +440,7 @@ def main():
...
@@ -440,7 +440,7 @@ def main():
else
:
else
:
device
=
torch
.
device
(
"cuda"
,
args
.
local_rank
)
device
=
torch
.
device
(
"cuda"
,
args
.
local_rank
)
n_gpu
=
1
n_gpu
=
1
print
(
"Initializing the distributed backend: NCCL"
)
#
print("Initializing the distributed backend: NCCL")
print
(
"device"
,
device
,
"n_gpu"
,
n_gpu
)
print
(
"device"
,
device
,
"n_gpu"
,
n_gpu
)
if
not
args
.
do_train
and
not
args
.
do_eval
:
if
not
args
.
do_train
and
not
args
.
do_eval
:
...
...
run_squad_pytorch.py
View file @
8e81e5e6
...
@@ -30,6 +30,9 @@ import six
...
@@ -30,6 +30,9 @@ import six
import
tensorflow
as
tf
import
tensorflow
as
tf
import
argparse
import
argparse
from
torch.utils.data
import
TensorDataset
,
DataLoader
,
RandomSampler
,
SequentialSampler
from
torch.utils.data.distributed
import
DistributedSampler
from
modeling_pytorch
import
BertConfig
,
BertForQuestionAnswering
from
modeling_pytorch
import
BertConfig
,
BertForQuestionAnswering
from
optimization_pytorch
import
BERTAdam
from
optimization_pytorch
import
BERTAdam
...
@@ -977,49 +980,13 @@ def main():
...
@@ -977,49 +980,13 @@ def main():
tokenizer
=
tokenization
.
FullTokenizer
(
tokenizer
=
tokenization
.
FullTokenizer
(
vocab_file
=
args
.
vocab_file
,
do_lower_case
=
args
.
do_lower_case
)
vocab_file
=
args
.
vocab_file
,
do_lower_case
=
args
.
do_lower_case
)
# tpu_cluster_resolver = None
# if args.use_tpu and args.tpu_name:
# tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
# args.tpu_name, zone=args.tpu_zone, project=args.gcp_project)
# is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
# run_config = tf.contrib.tpu.RunConfig(
# cluster=tpu_cluster_resolver,
# master=args.master,
# model_dir=args.output_dir,
# save_checkpoints_steps=args.save_checkpoints_steps,
# tpu_config=tf.contrib.tpu.TPUConfig(
# iterations_per_loop=args.iterations_per_loop,
# num_shards=args.num_tpu_cores,
# per_host_input_for_training=is_per_host))
train_examples
=
None
train_examples
=
None
num_train_steps
=
None
num_train_steps
=
None
# num_warmup_steps = None
if
args
.
do_train
:
if
args
.
do_train
:
train_examples
=
read_squad_examples
(
train_examples
=
read_squad_examples
(
input_file
=
args
.
train_file
,
is_training
=
True
)
input_file
=
args
.
train_file
,
is_training
=
True
)
num_train_steps
=
int
(
num_train_steps
=
int
(
len
(
train_examples
)
/
args
.
train_batch_size
*
args
.
num_train_epochs
)
len
(
train_examples
)
/
args
.
train_batch_size
*
args
.
num_train_epochs
)
# num_warmup_steps = int(num_train_steps * args.warmup_proportion)
# model_fn = model_fn_builder(
# bert_config=bert_config,
# init_checkpoint=args.init_checkpoint,
# learning_rate=args.learning_rate,
# num_train_steps=num_train_steps,
# num_warmup_steps=num_warmup_steps,
# use_tpu=args.use_tpu,
# use_one_hot_embeddings=args.use_tpu)
# If TPU is not available, this will fall back to normal Estimator on CPU
# or GPU.
# estimator = tf.contrib.tpu.TPUEstimator(
# use_tpu=args.use_tpu,
# model_fn=model_fn,
# config=run_config,
# train_batch_size=args.train_batch_size,
# predict_batch_size=args.predict_batch_size)
model
=
BertForQuestionAnswering
(
bert_config
)
model
=
BertForQuestionAnswering
(
bert_config
)
if
args
.
init_checkpoint
is
not
None
:
if
args
.
init_checkpoint
is
not
None
:
...
@@ -1041,17 +1008,36 @@ def main():
...
@@ -1041,17 +1008,36 @@ def main():
doc_stride
=
args
.
doc_stride
,
doc_stride
=
args
.
doc_stride
,
max_query_length
=
args
.
max_query_length
,
max_query_length
=
args
.
max_query_length
,
is_training
=
True
)
is_training
=
True
)
tf
.
logging
.
info
(
"***** Running training *****"
)
logger
.
info
(
"***** Running training *****"
)
tf
.
logging
.
info
(
" Num orig examples = %d"
,
len
(
train_examples
))
logger
.
info
(
" Num orig examples = %d"
,
len
(
train_examples
))
tf
.
logging
.
info
(
" Num split examples = %d"
,
len
(
train_features
))
logger
.
info
(
" Num split examples = %d"
,
len
(
train_features
))
tf
.
logging
.
info
(
" Batch size = %d"
,
args
.
train_batch_size
)
logger
.
info
(
" Batch size = %d"
,
args
.
train_batch_size
)
tf
.
logging
.
info
(
" Num steps = %d"
,
num_train_steps
)
logger
.
info
(
" Num steps = %d"
,
num_train_steps
)
train_input_fn
=
input_fn_builder
(
features
=
train_features
,
all_input_ids
=
torch
.
tensor
([
f
.
input_ids
for
f
in
train_features
],
dtype
=
torch
.
long
)
seq_length
=
args
.
max_seq_length
,
all_input_mask
=
torch
.
tensor
([
f
.
input_mask
for
f
in
train_features
],
dtype
=
torch
.
long
)
is_training
=
True
,
all_segment_ids
=
torch
.
tensor
([
f
.
segment_ids
for
f
in
train_features
],
dtype
=
torch
.
long
)
drop_remainder
=
True
)
all_label_ids
=
torch
.
tensor
([
f
.
label_id
for
f
in
train_features
],
dtype
=
torch
.
long
)
estimator
.
train
(
input_fn
=
train_input_fn
,
max_steps
=
num_train_steps
)
train_data
=
TensorDataset
(
all_input_ids
,
all_input_mask
,
all_segment_ids
,
all_label_ids
)
if
args
.
local_rank
==
-
1
:
train_sampler
=
RandomSampler
(
train_data
)
else
:
train_sampler
=
DistributedSampler
(
train_data
)
train_dataloader
=
DataLoader
(
train_data
,
sampler
=
train_sampler
,
batch_size
=
args
.
train_batch_size
)
model
.
train
()
for
epoch
in
args
.
num_train_epochs
:
for
input_ids
,
input_mask
,
segment_ids
,
label_ids
in
train_dataloader
:
input_ids
=
input_ids
.
to
(
device
)
input_mask
=
input_mask
.
float
().
to
(
device
)
segment_ids
=
segment_ids
.
to
(
device
)
label_ids
=
label_ids
.
to
(
device
)
loss
,
_
=
model
(
input_ids
,
segment_ids
,
input_mask
,
label_ids
)
loss
.
backward
()
optimizer
.
step
()
global_step
+=
1
if
args
.
do_predict
:
if
args
.
do_predict
:
eval_examples
=
read_squad_examples
(
eval_examples
=
read_squad_examples
(
...
@@ -1064,29 +1050,39 @@ def main():
...
@@ -1064,29 +1050,39 @@ def main():
max_query_length
=
args
.
max_query_length
,
max_query_length
=
args
.
max_query_length
,
is_training
=
False
)
is_training
=
False
)
tf
.
logg
ing
.
info
(
"***** Running predictions *****"
)
logg
er
.
info
(
"***** Running predictions *****"
)
tf
.
logg
ing
.
info
(
" Num orig examples = %d"
,
len
(
eval_examples
))
logg
er
.
info
(
" Num orig examples = %d"
,
len
(
eval_examples
))
tf
.
logg
ing
.
info
(
" Num split examples = %d"
,
len
(
eval_features
))
logg
er
.
info
(
" Num split examples = %d"
,
len
(
eval_features
))
tf
.
logg
ing
.
info
(
" Batch size = %d"
,
args
.
predict_batch_size
)
logg
er
.
info
(
" Batch size = %d"
,
args
.
predict_batch_size
)
all_results
=
[]
all_input_ids
=
torch
.
tensor
([
f
.
input_ids
for
f
in
eval_features
],
dtype
=
torch
.
long
)
all_input_mask
=
torch
.
tensor
([
f
.
input_mask
for
f
in
eval_features
],
dtype
=
torch
.
long
)
all_segment_ids
=
torch
.
tensor
([
f
.
segment_ids
for
f
in
eval_features
],
dtype
=
torch
.
long
)
all_label_ids
=
torch
.
tensor
([
f
.
label_id
for
f
in
eval_features
],
dtype
=
torch
.
long
)
all_example_index
=
torch
.
arange
(
all_input_ids
.
size
(
0
),
dtype
=
torch
.
long
)
predict_input_fn
=
input_fn_builder
(
eval_data
=
TensorDataset
(
all_input_ids
,
all_input_mask
,
all_segment_ids
,
all_label_ids
,
all_example_index
)
features
=
eval_features
,
if
args
.
local_rank
==
-
1
:
seq_length
=
args
.
max_seq_length
,
eval_sampler
=
SequentialSampler
(
eval_data
)
is_training
=
False
,
else
:
drop_remainder
=
False
)
eval_sampler
=
DistributedSampler
(
eval_data
)
eval_dataloader
=
DataLoader
(
eval_data
,
sampler
=
eval_sampler
,
batch_size
=
args
.
eval_batch_size
)
# If running eval on the TPU, you will need to specify the number of
model
.
eval
()
# steps.
all_results
=
[]
all_results
=
[]
for
result
in
estimator
.
predict
(
for
input_ids
,
input_mask
,
segment_ids
,
label_ids
,
example_index
in
eval_dataloader
:
predict_input_fn
,
yield_single_examples
=
True
):
if
len
(
all_results
)
%
1000
==
0
:
if
len
(
all_results
)
%
1000
==
0
:
tf
.
logging
.
info
(
"Processing example: %d"
%
(
len
(
all_results
)))
logger
.
info
(
"Processing example: %d"
%
(
len
(
all_results
)))
unique_id
=
int
(
result
[
"unique_ids"
])
start_logits
=
[
float
(
x
)
for
x
in
result
[
"start_logits"
].
flat
]
input_ids
=
input_ids
.
to
(
device
)
end_logits
=
[
float
(
x
)
for
x
in
result
[
"end_logits"
].
flat
]
input_mask
=
input_mask
.
float
().
to
(
device
)
segment_ids
=
segment_ids
.
to
(
device
)
start_logits
,
end_logits
=
model
(
input_ids
,
segment_ids
,
input_mask
)
unique_id
=
[
int
(
eval_features
[
e
.
item
()].
unique_id
)
for
e
in
example_index
]
start_logits
=
[
x
.
item
()
for
x
in
start_logits
]
end_logits
=
[
x
.
item
()
for
x
in
end_logits
]
all_results
.
append
(
all_results
.
append
(
RawResult
(
RawResult
(
unique_id
=
unique_id
,
unique_id
=
unique_id
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment