Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
68ab9599
Commit
68ab9599
authored
Jun 19, 2019
by
thomwolf
Browse files
small fix and updates to readme
parent
f7e2ac01
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
53 additions
and
18 deletions
+53
-18
README.md
README.md
+20
-4
examples/bertology.py
examples/bertology.py
+20
-11
examples/run_classifier.py
examples/run_classifier.py
+5
-1
examples/run_squad.py
examples/run_squad.py
+4
-0
pytorch_pretrained_bert/modeling.py
pytorch_pretrained_bert/modeling.py
+4
-2
No files found.
README.md
View file @
68ab9599
...
@@ -1322,12 +1322,14 @@ python $SQUAD_DIR/evaluate-v1.1.py $SQUAD_DIR/dev-v1.1.json /tmp/debug_squad/pre
...
@@ -1322,12 +1322,14 @@ python $SQUAD_DIR/evaluate-v1.1.py $SQUAD_DIR/dev-v1.1.json /tmp/debug_squad/pre
{
"f1"
: 88.52381567990474,
"exact_match"
: 81.22043519394512
}
{
"f1"
: 88.52381567990474,
"exact_match"
: 81.22043519394512
}
```
```
Here is an example using distributed training on 8 V100 GPUs and Bert Whole Word Masking model to reach a F1 > 93 on SQuAD:
**distributed training**
Here is an example using distributed training on 8 V100 GPUs and Bert Whole Word Masking uncased model to reach a F1 > 93 on SQuAD:
```
bash
```
bash
python
-m
torch.distributed.launch
--nproc_per_node
=
8
\
python
-m
torch.distributed.launch
--nproc_per_node
=
8
\
run_squad.py
\
run_squad.py
\
--bert_model
bert-large-cased-whole-word-masking
\
--bert_model
bert-large-
un
cased-whole-word-masking
\
--do_train
\
--do_train
\
--do_predict
\
--do_predict
\
--do_lower_case
\
--do_lower_case
\
...
@@ -1337,17 +1339,31 @@ python -m torch.distributed.launch --nproc_per_node=8 \
...
@@ -1337,17 +1339,31 @@ python -m torch.distributed.launch --nproc_per_node=8 \
--num_train_epochs
2
\
--num_train_epochs
2
\
--max_seq_length
384
\
--max_seq_length
384
\
--doc_stride
128
\
--doc_stride
128
\
--output_dir
../models/
train_squad_large_cased_wwm
/
\
--output_dir
../models/
wwm_uncased_finetuned_squad
/
\
--train_batch_size
24
\
--train_batch_size
24
\
--gradient_accumulation_steps
12
--gradient_accumulation_steps
12
```
```
Training with these hyper-parameters gave us the following results:
Training with these hyper-parameters gave us the following results:
```
bash
```
bash
python
$SQUAD_DIR
/evaluate-v1.1.py
$SQUAD_DIR
/dev-v1.1.json ../models/
train_squad_large_cased_wwm
/predictions.json
python
$SQUAD_DIR
/evaluate-v1.1.py
$SQUAD_DIR
/dev-v1.1.json ../models/
wwm_uncased_finetuned_squad
/predictions.json
{
"exact_match"
: 86.91579943235573,
"f1"
: 93.1532499015869
}
{
"exact_match"
: 86.91579943235573,
"f1"
: 93.1532499015869
}
```
```
This is the model provided as
`bert-large-uncased-whole-word-masking-finetuned-squad`
.
And here is the model provided as
`bert-large-cased-whole-word-masking-finetuned-squad`
:
```
bash
python
-m
torch.distributed.launch
--nproc_per_node
=
8 run_squad.py
--bert_model
bert-large-cased-whole-word-masking
--do_train
--do_predict
--do_lower_case
--train_file
$SQUAD_DIR
/train-v1.1.json
--predict_file
$SQUAD_DIR
/dev-v1.1.json
--learning_rate
3e-5
--num_train_epochs
2
--max_seq_length
384
--doc_stride
128
--output_dir
../models/wwm_cased_finetuned_squad/
--train_batch_size
24
--gradient_accumulation_steps
12
```
Training with these hyper-parameters gave us the following results:
```
bash
python
$SQUAD_DIR
/evaluate-v1.1.py
$SQUAD_DIR
/dev-v1.1.json ../models/wwm_uncased_finetuned_squad/predictions.json
{
"exact_match"
: 84.18164616840113,
"f1"
: 91.58645594850135
}
```
#### SWAG
#### SWAG
The data for SWAG can be downloaded by cloning the following
[
repository
](
https://github.com/rowanz/swagaf
)
The data for SWAG can be downloaded by cloning the following
[
repository
](
https://github.com/rowanz/swagaf
)
...
...
examples/bertology.py
View file @
68ab9599
...
@@ -8,7 +8,7 @@ import torch
...
@@ -8,7 +8,7 @@ import torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
import
numpy
as
np
import
numpy
as
np
from
pytorch_pretrained_bert
import
Bert
Model
,
BertTokenizer
from
pytorch_pretrained_bert
import
Bert
ForSequenceClassification
,
BertTokenizer
logging
.
basicConfig
(
format
=
'%(asctime)s - %(levelname)s - %(name)s - %(message)s'
,
logging
.
basicConfig
(
format
=
'%(asctime)s - %(levelname)s - %(name)s - %(message)s'
,
datefmt
=
'%m/%d/%Y %H:%M:%S'
,
datefmt
=
'%m/%d/%Y %H:%M:%S'
,
...
@@ -17,24 +17,33 @@ logger = logging.getLogger(__name__)
...
@@ -17,24 +17,33 @@ logger = logging.getLogger(__name__)
def
run_model
():
def
run_model
():
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--model_name_or_path'
,
type
=
str
,
default
=
'bert-base-uncased'
,
parser
.
add_argument
(
'--model_name_or_path'
,
type
=
str
,
default
=
'bert-base-uncased'
,
help
=
'pretrained model name or path to local checkpoint'
)
help
=
'pretrained model name or path to local checkpoint'
)
parser
.
add_argument
(
"--seed"
,
type
=
int
,
default
=
42
)
parser
.
add_argument
(
"--seed"
,
type
=
int
,
default
=
42
)
parser
.
add_argument
(
"--local_rank"
,
type
=
int
,
default
=-
1
,
help
=
"local_rank for distributed training on gpus"
)
parser
.
add_argument
(
"--no_cuda"
,
action
=
'store_true'
,
help
=
"Whether not to use CUDA when available"
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
print
(
args
)
if
args
.
batch_size
==
-
1
:
args
.
batch_size
=
1
assert
args
.
nsamples
%
args
.
batch_size
==
0
np
.
random
.
seed
(
args
.
seed
)
np
.
random
.
seed
(
args
.
seed
)
torch
.
random
.
manual_seed
(
args
.
seed
)
torch
.
random
.
manual_seed
(
args
.
seed
)
torch
.
cuda
.
manual_seed
(
args
.
seed
)
torch
.
cuda
.
manual_seed
(
args
.
seed
)
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
if
args
.
local_rank
==
-
1
or
args
.
no_cuda
:
args
.
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
and
not
args
.
no_cuda
else
"cpu"
)
n_gpu
=
torch
.
cuda
.
device_count
()
else
:
torch
.
cuda
.
set_device
(
args
.
local_rank
)
args
.
device
=
torch
.
device
(
"cuda"
,
args
.
local_rank
)
n_gpu
=
1
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
torch
.
distributed
.
init_process_group
(
backend
=
'nccl'
)
logging
.
basicConfig
(
level
=
logging
.
INFO
if
args
.
local_rank
in
[
-
1
,
0
]
else
logging
.
WARN
)
logger
.
info
(
"device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}"
.
format
(
args
.
device
,
n_gpu
,
bool
(
args
.
local_rank
!=
-
1
),
args
.
fp16
))
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
model_name_or_path
)
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
model_name_or_path
)
model
=
Bert
Model
.
from_pretrained
(
args
.
model_name_or_path
)
model
=
Bert
ForSequenceClassification
.
from_pretrained
(
args
.
model_name_or_path
)
model
.
to
(
device
)
model
.
to
(
args
.
device
)
model
.
eval
()
model
.
eval
()
...
...
examples/run_classifier.py
View file @
68ab9599
...
@@ -187,7 +187,7 @@ def main():
...
@@ -187,7 +187,7 @@ def main():
if
os
.
path
.
exists
(
args
.
output_dir
)
and
os
.
listdir
(
args
.
output_dir
)
and
args
.
do_train
and
not
args
.
overwrite_output_dir
:
if
os
.
path
.
exists
(
args
.
output_dir
)
and
os
.
listdir
(
args
.
output_dir
)
and
args
.
do_train
and
not
args
.
overwrite_output_dir
:
raise
ValueError
(
"Output directory ({}) already exists and is not empty."
.
format
(
args
.
output_dir
))
raise
ValueError
(
"Output directory ({}) already exists and is not empty."
.
format
(
args
.
output_dir
))
if
not
os
.
path
.
exists
(
args
.
output_dir
):
if
not
os
.
path
.
exists
(
args
.
output_dir
)
and
args
.
local_rank
in
[
-
1
,
0
]
:
os
.
makedirs
(
args
.
output_dir
)
os
.
makedirs
(
args
.
output_dir
)
task_name
=
args
.
task_name
.
lower
()
task_name
=
args
.
task_name
.
lower
()
...
@@ -361,6 +361,10 @@ def main():
...
@@ -361,6 +361,10 @@ def main():
# Load a trained model and vocabulary that you have fine-tuned
# Load a trained model and vocabulary that you have fine-tuned
model
=
BertForSequenceClassification
.
from_pretrained
(
args
.
output_dir
,
num_labels
=
num_labels
)
model
=
BertForSequenceClassification
.
from_pretrained
(
args
.
output_dir
,
num_labels
=
num_labels
)
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
output_dir
,
do_lower_case
=
args
.
do_lower_case
)
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
output_dir
,
do_lower_case
=
args
.
do_lower_case
)
# Good practice: save your training arguments together with the trained model
output_args_file
=
os
.
path
.
join
(
args
.
output_dir
,
'training_args.bin'
)
torch
.
save
(
args
,
output_args_file
)
else
:
else
:
model
=
BertForSequenceClassification
.
from_pretrained
(
args
.
bert_model
)
model
=
BertForSequenceClassification
.
from_pretrained
(
args
.
bert_model
)
...
...
examples/run_squad.py
View file @
68ab9599
...
@@ -331,6 +331,10 @@ def main():
...
@@ -331,6 +331,10 @@ def main():
# Load a trained model and vocabulary that you have fine-tuned
# Load a trained model and vocabulary that you have fine-tuned
model
=
BertForQuestionAnswering
.
from_pretrained
(
args
.
output_dir
)
model
=
BertForQuestionAnswering
.
from_pretrained
(
args
.
output_dir
)
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
output_dir
,
do_lower_case
=
args
.
do_lower_case
)
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
output_dir
,
do_lower_case
=
args
.
do_lower_case
)
# Good practice: save your training arguments together with the trained model
output_args_file
=
os
.
path
.
join
(
args
.
output_dir
,
'training_args.bin'
)
torch
.
save
(
args
,
output_args_file
)
else
:
else
:
model
=
BertForQuestionAnswering
.
from_pretrained
(
args
.
bert_model
)
model
=
BertForQuestionAnswering
.
from_pretrained
(
args
.
bert_model
)
...
...
pytorch_pretrained_bert/modeling.py
View file @
68ab9599
...
@@ -46,8 +46,7 @@ PRETRAINED_MODEL_ARCHIVE_MAP = {
...
@@ -46,8 +46,7 @@ PRETRAINED_MODEL_ARCHIVE_MAP = {
'bert-large-cased-whole-word-masking'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-pytorch_model.bin"
,
'bert-large-cased-whole-word-masking'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-pytorch_model.bin"
,
'bert-large-uncased-whole-word-masking-finetuned-squad'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-finetuned-squad-pytorch_model.bin"
,
'bert-large-uncased-whole-word-masking-finetuned-squad'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-finetuned-squad-pytorch_model.bin"
,
'bert-large-cased-whole-word-masking-finetuned-squad'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-finetuned-squad-pytorch_model.bin"
,
'bert-large-cased-whole-word-masking-finetuned-squad'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-finetuned-squad-pytorch_model.bin"
,
'bert-base-uncased-finetuned-mrpc'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-finetuned-mrpc-pytorch_model.bin"
,
'bert-base-cased-finetuned-mrpc'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-finetuned-mrpc-pytorch_model.bin"
,
'bert-large-uncased-whole-word-masking-finetuned-mrpc'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-finetuned-mrpc-pytorch_model.bin"
,
}
}
PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
'bert-base-uncased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json"
,
'bert-base-uncased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json"
,
...
@@ -60,6 +59,9 @@ PRETRAINED_CONFIG_ARCHIVE_MAP = {
...
@@ -60,6 +59,9 @@ PRETRAINED_CONFIG_ARCHIVE_MAP = {
'bert-base-german-cased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-cased-config.json"
,
'bert-base-german-cased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-cased-config.json"
,
'bert-large-uncased-whole-word-masking'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-config.json"
,
'bert-large-uncased-whole-word-masking'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-config.json"
,
'bert-large-cased-whole-word-masking'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-config.json"
,
'bert-large-cased-whole-word-masking'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-config.json"
,
'bert-large-uncased-whole-word-masking-finetuned-squad'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-finetuned-squad-config.json"
,
'bert-large-cased-whole-word-masking-finetuned-squad'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-finetuned-squad-config.json"
,
'bert-base-cased-finetuned-mrpc'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-finetuned-mrpc-config.json"
,
}
}
BERT_CONFIG_NAME
=
'bert_config.json'
BERT_CONFIG_NAME
=
'bert_config.json'
TF_WEIGHTS_NAME
=
'model.ckpt'
TF_WEIGHTS_NAME
=
'model.ckpt'
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment