Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
e61db0d1
"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "f8208fa456039b46873a2e497b6318d30a4fc84e"
Commit
e61db0d1
authored
Nov 02, 2018
by
thomwolf
Browse files
run_squad WIP
parent
c0065af6
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
76 additions
and
41 deletions
+76
-41
extract_features_pytorch.py
extract_features_pytorch.py
+2
-2
modeling_pytorch.py
modeling_pytorch.py
+3
-3
run_squad_pytorch.py
run_squad_pytorch.py
+71
-36
No files found.
extract_features_pytorch.py
View file @
e61db0d1
...
@@ -316,7 +316,7 @@ def read_examples(input_file):
...
@@ -316,7 +316,7 @@ def read_examples(input_file):
return
examples
return
examples
def
main
(
_
):
def
main
():
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
layer_indexes
=
[
int
(
x
)
for
x
in
args
.
layers
.
split
(
","
)]
layer_indexes
=
[
int
(
x
)
for
x
in
args
.
layers
.
split
(
","
)]
...
@@ -387,4 +387,4 @@ def main(_):
...
@@ -387,4 +387,4 @@ def main(_):
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
tf
.
app
.
ru
n
()
mai
n
()
modeling_pytorch.py
View file @
e61db0d1
...
@@ -441,8 +441,8 @@ class BertForSequenceClassification(nn.Module):
...
@@ -441,8 +441,8 @@ class BertForSequenceClassification(nn.Module):
class
BertForQuestionAnswering
(
nn
.
Module
):
class
BertForQuestionAnswering
(
nn
.
Module
):
"""BERT model for Question Answering (span extraction).
"""BERT model for Question Answering (span extraction).
This module is composed of the BERT model with linear layer
s
on top of
This module is composed of the BERT model with
a
linear layer on top of
the sequence output
.
the sequence output
that computes start_logits and end_logits
Example usage:
Example usage:
```python
```python
...
@@ -455,7 +455,7 @@ class BertForQuestionAnswering(nn.Module):
...
@@ -455,7 +455,7 @@ class BertForQuestionAnswering(nn.Module):
num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024)
num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024)
model = BertForQuestionAnswering(config)
model = BertForQuestionAnswering(config)
logits = model(input_ids, token_type_ids, input_mask)
start_logits, end_
logits = model(input_ids, token_type_ids, input_mask)
```
```
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
...
...
run_squad_pytorch.py
View file @
e61db0d1
...
@@ -19,6 +19,7 @@ from __future__ import division
...
@@ -19,6 +19,7 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
collections
import
collections
import
logging
import
json
import
json
import
math
import
math
import
os
import
os
...
@@ -29,6 +30,14 @@ import six
...
@@ -29,6 +30,14 @@ import six
import
tensorflow
as
tf
import
tensorflow
as
tf
import
argparse
import
argparse
from
modeling_pytorch
import
BertConfig
,
BertForQuestionAnswering
from
optimization_pytorch
import
BERTAdam
logging
.
basicConfig
(
format
=
'%(asctime)s - %(levelname)s - %(name)s - %(message)s'
,
datefmt
=
'%m/%d/%Y %H:%M:%S'
,
level
=
logging
.
INFO
)
logger
=
logging
.
getLogger
(
__name__
)
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
## Required parameters
## Required parameters
...
@@ -94,6 +103,10 @@ parser.add_argument("--num_tpu_cores", default=8, type=int, help="Only used if `
...
@@ -94,6 +103,10 @@ parser.add_argument("--num_tpu_cores", default=8, type=int, help="Only used if `
parser
.
add_argument
(
"--verbose_logging"
,
default
=
False
,
type
=
bool
,
parser
.
add_argument
(
"--verbose_logging"
,
default
=
False
,
type
=
bool
,
help
=
"If true, all of the warnings related to data processing will be printed. "
help
=
"If true, all of the warnings related to data processing will be printed. "
"A number of warnings are expected for a normal SQuAD evaluation."
)
"A number of warnings are expected for a normal SQuAD evaluation."
)
parser
.
add_argument
(
"--local_rank"
,
type
=
int
,
default
=-
1
,
help
=
"local_rank for distributed training on gpus"
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
...
@@ -926,8 +939,15 @@ def _compute_softmax(scores):
...
@@ -926,8 +939,15 @@ def _compute_softmax(scores):
return
probs
return
probs
def
main
(
_
):
def
main
():
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
if
args
.
local_rank
==
-
1
or
args
.
no_cuda
:
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
and
not
args
.
no_cuda
else
"cpu"
)
n_gpu
=
torch
.
cuda
.
device_count
()
else
:
device
=
torch
.
device
(
"cuda"
,
args
.
local_rank
)
n_gpu
=
1
# print("Initializing the distributed backend: NCCL")
print
(
"device"
,
device
,
"n_gpu"
,
n_gpu
)
if
not
args
.
do_train
and
not
args
.
do_predict
:
if
not
args
.
do_train
and
not
args
.
do_predict
:
raise
ValueError
(
"At least one of `do_train` or `do_predict` must be True."
)
raise
ValueError
(
"At least one of `do_train` or `do_predict` must be True."
)
...
@@ -941,7 +961,7 @@ def main(_):
...
@@ -941,7 +961,7 @@ def main(_):
raise
ValueError
(
raise
ValueError
(
"If `do_predict` is True, then `predict_file` must be specified."
)
"If `do_predict` is True, then `predict_file` must be specified."
)
bert_config
=
modeling
.
BertConfig
.
from_json_file
(
args
.
bert_config_file
)
bert_config
=
BertConfig
.
from_json_file
(
args
.
bert_config_file
)
if
args
.
max_seq_length
>
bert_config
.
max_position_embeddings
:
if
args
.
max_seq_length
>
bert_config
.
max_position_embeddings
:
raise
ValueError
(
raise
ValueError
(
...
@@ -949,54 +969,69 @@ def main(_):
...
@@ -949,54 +969,69 @@ def main(_):
"was only trained up to sequence length %d"
%
"was only trained up to sequence length %d"
%
(
args
.
max_seq_length
,
bert_config
.
max_position_embeddings
))
(
args
.
max_seq_length
,
bert_config
.
max_position_embeddings
))
tf
.
gfile
.
MakeDirs
(
args
.
output_dir
)
if
os
.
path
.
exists
(
args
.
output_dir
)
and
os
.
listdir
(
args
.
output_dir
):
raise
ValueError
(
f
"Output directory (
{
args
.
output_dir
}
) already exists and is "
f
"not empty."
)
os
.
makedirs
(
args
.
output_dir
,
exist_ok
=
True
)
tokenizer
=
tokenization
.
FullTokenizer
(
tokenizer
=
tokenization
.
FullTokenizer
(
vocab_file
=
args
.
vocab_file
,
do_lower_case
=
args
.
do_lower_case
)
vocab_file
=
args
.
vocab_file
,
do_lower_case
=
args
.
do_lower_case
)
tpu_cluster_resolver
=
None
#
tpu_cluster_resolver = None
if
args
.
use_tpu
and
args
.
tpu_name
:
#
if args.use_tpu and args.tpu_name:
tpu_cluster_resolver
=
tf
.
contrib
.
cluster_resolver
.
TPUClusterResolver
(
#
tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
args
.
tpu_name
,
zone
=
args
.
tpu_zone
,
project
=
args
.
gcp_project
)
#
args.tpu_name, zone=args.tpu_zone, project=args.gcp_project)
is_per_host
=
tf
.
contrib
.
tpu
.
InputPipelineConfig
.
PER_HOST_V2
#
is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
run_config
=
tf
.
contrib
.
tpu
.
RunConfig
(
#
run_config = tf.contrib.tpu.RunConfig(
cluster
=
tpu_cluster_resolver
,
#
cluster=tpu_cluster_resolver,
master
=
args
.
master
,
#
master=args.master,
model_dir
=
args
.
output_dir
,
#
model_dir=args.output_dir,
save_checkpoints_steps
=
args
.
save_checkpoints_steps
,
#
save_checkpoints_steps=args.save_checkpoints_steps,
tpu_config
=
tf
.
contrib
.
tpu
.
TPUConfig
(
#
tpu_config=tf.contrib.tpu.TPUConfig(
iterations_per_loop
=
args
.
iterations_per_loop
,
#
iterations_per_loop=args.iterations_per_loop,
num_shards
=
args
.
num_tpu_cores
,
#
num_shards=args.num_tpu_cores,
per_host_input_for_training
=
is_per_host
))
#
per_host_input_for_training=is_per_host))
train_examples
=
None
train_examples
=
None
num_train_steps
=
None
num_train_steps
=
None
num_warmup_steps
=
None
#
num_warmup_steps = None
if
args
.
do_train
:
if
args
.
do_train
:
train_examples
=
read_squad_examples
(
train_examples
=
read_squad_examples
(
input_file
=
args
.
train_file
,
is_training
=
True
)
input_file
=
args
.
train_file
,
is_training
=
True
)
num_train_steps
=
int
(
num_train_steps
=
int
(
len
(
train_examples
)
/
args
.
train_batch_size
*
args
.
num_train_epochs
)
len
(
train_examples
)
/
args
.
train_batch_size
*
args
.
num_train_epochs
)
num_warmup_steps
=
int
(
num_train_steps
*
args
.
warmup_proportion
)
#
num_warmup_steps = int(num_train_steps * args.warmup_proportion)
model_fn
=
model_fn_builder
(
#
model_fn = model_fn_builder(
bert_config
=
bert_config
,
#
bert_config=bert_config,
init_checkpoint
=
args
.
init_checkpoint
,
#
init_checkpoint=args.init_checkpoint,
learning_rate
=
args
.
learning_rate
,
#
learning_rate=args.learning_rate,
num_train_steps
=
num_train_steps
,
#
num_train_steps=num_train_steps,
num_warmup_steps
=
num_warmup_steps
,
#
num_warmup_steps=num_warmup_steps,
use_tpu
=
args
.
use_tpu
,
#
use_tpu=args.use_tpu,
use_one_hot_embeddings
=
args
.
use_tpu
)
#
use_one_hot_embeddings=args.use_tpu)
# If TPU is not available, this will fall back to normal Estimator on CPU
# If TPU is not available, this will fall back to normal Estimator on CPU
# or GPU.
# or GPU.
estimator
=
tf
.
contrib
.
tpu
.
TPUEstimator
(
# estimator = tf.contrib.tpu.TPUEstimator(
use_tpu
=
args
.
use_tpu
,
# use_tpu=args.use_tpu,
model_fn
=
model_fn
,
# model_fn=model_fn,
config
=
run_config
,
# config=run_config,
train_batch_size
=
args
.
train_batch_size
,
# train_batch_size=args.train_batch_size,
predict_batch_size
=
args
.
predict_batch_size
)
# predict_batch_size=args.predict_batch_size)
model
=
BertForQuestionAnswering
(
bert_config
)
if
args
.
init_checkpoint
is
not
None
:
model
.
bert
.
load_state_dict
(
torch
.
load
(
args
.
init_checkpoint
,
map_location
=
'cpu'
))
model
.
to
(
device
)
optimizer
=
BERTAdam
([{
'params'
:
[
p
for
n
,
p
in
model
.
named_parameters
()
if
n
!=
'bias'
],
'l2'
:
0.01
},
{
'params'
:
[
p
for
n
,
p
in
model
.
named_parameters
()
if
n
==
'bias'
],
'l2'
:
0.
}
],
lr
=
args
.
learning_rate
,
schedule
=
'warmup_linear'
,
warmup
=
args
.
warmup_proportion
,
t_total
=
num_train_steps
)
if
args
.
do_train
:
if
args
.
do_train
:
train_features
=
convert_examples_to_features
(
train_features
=
convert_examples_to_features
(
...
@@ -1067,4 +1102,4 @@ def main(_):
...
@@ -1067,4 +1102,4 @@ def main(_):
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
tf
.
app
.
ru
n
()
mai
n
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment