Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
68ab9599
Commit
68ab9599
authored
Jun 19, 2019
by
thomwolf
Browse files
small fix and updates to readme
parent
f7e2ac01
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
53 additions
and
18 deletions
+53
-18
README.md
README.md
+20
-4
examples/bertology.py
examples/bertology.py
+20
-11
examples/run_classifier.py
examples/run_classifier.py
+5
-1
examples/run_squad.py
examples/run_squad.py
+4
-0
pytorch_pretrained_bert/modeling.py
pytorch_pretrained_bert/modeling.py
+4
-2
No files found.
README.md
View file @
68ab9599
...
@@ -1322,12 +1322,14 @@ python $SQUAD_DIR/evaluate-v1.1.py $SQUAD_DIR/dev-v1.1.json /tmp/debug_squad/pre
...
@@ -1322,12 +1322,14 @@ python $SQUAD_DIR/evaluate-v1.1.py $SQUAD_DIR/dev-v1.1.json /tmp/debug_squad/pre
{
"f1"
: 88.52381567990474,
"exact_match"
: 81.22043519394512
}
{
"f1"
: 88.52381567990474,
"exact_match"
: 81.22043519394512
}
```
```
Here is an example using distributed training on 8 V100 GPUs and Bert Whole Word Masking model to reach a F1 > 93 on SQuAD:
**distributed training**
Here is an example using distributed training on 8 V100 GPUs and Bert Whole Word Masking uncased model to reach a F1 > 93 on SQuAD:
```
bash
```
bash
python
-m
torch.distributed.launch
--nproc_per_node
=
8
\
python
-m
torch.distributed.launch
--nproc_per_node
=
8
\
run_squad.py
\
run_squad.py
\
--bert_model
bert-large-cased-whole-word-masking
\
--bert_model
bert-large-
un
cased-whole-word-masking
\
--do_train
\
--do_train
\
--do_predict
\
--do_predict
\
--do_lower_case
\
--do_lower_case
\
...
@@ -1337,17 +1339,31 @@ python -m torch.distributed.launch --nproc_per_node=8 \
...
@@ -1337,17 +1339,31 @@ python -m torch.distributed.launch --nproc_per_node=8 \
--num_train_epochs
2
\
--num_train_epochs
2
\
--max_seq_length
384
\
--max_seq_length
384
\
--doc_stride
128
\
--doc_stride
128
\
--output_dir
../models/
train_squad_large_cased_wwm
/
\
--output_dir
../models/
wwm_uncased_finetuned_squad
/
\
--train_batch_size
24
\
--train_batch_size
24
\
--gradient_accumulation_steps
12
--gradient_accumulation_steps
12
```
```
Training with these hyper-parameters gave us the following results:
Training with these hyper-parameters gave us the following results:
```
bash
```
bash
python
$SQUAD_DIR
/evaluate-v1.1.py
$SQUAD_DIR
/dev-v1.1.json ../models/
train_squad_large_cased_wwm
/predictions.json
python
$SQUAD_DIR
/evaluate-v1.1.py
$SQUAD_DIR
/dev-v1.1.json ../models/
wwm_uncased_finetuned_squad
/predictions.json
{
"exact_match"
: 86.91579943235573,
"f1"
: 93.1532499015869
}
{
"exact_match"
: 86.91579943235573,
"f1"
: 93.1532499015869
}
```
```
This is the model provided as
`bert-large-uncased-whole-word-masking-finetuned-squad`
.
And here is the model provided as
`bert-large-cased-whole-word-masking-finetuned-squad`
:
```
bash
python
-m
torch.distributed.launch
--nproc_per_node
=
8 run_squad.py
--bert_model
bert-large-cased-whole-word-masking
--do_train
--do_predict
--do_lower_case
--train_file
$SQUAD_DIR
/train-v1.1.json
--predict_file
$SQUAD_DIR
/dev-v1.1.json
--learning_rate
3e-5
--num_train_epochs
2
--max_seq_length
384
--doc_stride
128
--output_dir
../models/wwm_cased_finetuned_squad/
--train_batch_size
24
--gradient_accumulation_steps
12
```
Training with these hyper-parameters gave us the following results:
```
bash
python
$SQUAD_DIR
/evaluate-v1.1.py
$SQUAD_DIR
/dev-v1.1.json ../models/wwm_uncased_finetuned_squad/predictions.json
{
"exact_match"
: 84.18164616840113,
"f1"
: 91.58645594850135
}
```
#### SWAG
#### SWAG
The data for SWAG can be downloaded by cloning the following
[
repository
](
https://github.com/rowanz/swagaf
)
The data for SWAG can be downloaded by cloning the following
[
repository
](
https://github.com/rowanz/swagaf
)
...
...
examples/bertology.py
View file @
68ab9599
...
@@ -8,7 +8,7 @@ import torch
...
@@ -8,7 +8,7 @@ import torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
import
numpy
as
np
import
numpy
as
np
from
pytorch_pretrained_bert
import
Bert
Model
,
BertTokenizer
from
pytorch_pretrained_bert
import
Bert
ForSequenceClassification
,
BertTokenizer
logging
.
basicConfig
(
format
=
'%(asctime)s - %(levelname)s - %(name)s - %(message)s'
,
logging
.
basicConfig
(
format
=
'%(asctime)s - %(levelname)s - %(name)s - %(message)s'
,
datefmt
=
'%m/%d/%Y %H:%M:%S'
,
datefmt
=
'%m/%d/%Y %H:%M:%S'
,
...
@@ -17,24 +17,33 @@ logger = logging.getLogger(__name__)
...
@@ -17,24 +17,33 @@ logger = logging.getLogger(__name__)
def
run_model
():
def
run_model
():
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--model_name_or_path'
,
type
=
str
,
default
=
'bert-base-uncased'
,
parser
.
add_argument
(
'--model_name_or_path'
,
type
=
str
,
default
=
'bert-base-uncased'
,
help
=
'pretrained model name or path to local checkpoint'
)
help
=
'pretrained model name or path to local checkpoint'
)
parser
.
add_argument
(
"--seed"
,
type
=
int
,
default
=
42
)
parser
.
add_argument
(
"--seed"
,
type
=
int
,
default
=
42
)
parser
.
add_argument
(
"--local_rank"
,
type
=
int
,
default
=-
1
,
help
=
"local_rank for distributed training on gpus"
)
parser
.
add_argument
(
"--no_cuda"
,
action
=
'store_true'
,
help
=
"Whether not to use CUDA when available"
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
print
(
args
)
if
args
.
batch_size
==
-
1
:
args
.
batch_size
=
1
assert
args
.
nsamples
%
args
.
batch_size
==
0
np
.
random
.
seed
(
args
.
seed
)
np
.
random
.
seed
(
args
.
seed
)
torch
.
random
.
manual_seed
(
args
.
seed
)
torch
.
random
.
manual_seed
(
args
.
seed
)
torch
.
cuda
.
manual_seed
(
args
.
seed
)
torch
.
cuda
.
manual_seed
(
args
.
seed
)
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
if
args
.
local_rank
==
-
1
or
args
.
no_cuda
:
args
.
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
and
not
args
.
no_cuda
else
"cpu"
)
n_gpu
=
torch
.
cuda
.
device_count
()
else
:
torch
.
cuda
.
set_device
(
args
.
local_rank
)
args
.
device
=
torch
.
device
(
"cuda"
,
args
.
local_rank
)
n_gpu
=
1
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
torch
.
distributed
.
init_process_group
(
backend
=
'nccl'
)
logging
.
basicConfig
(
level
=
logging
.
INFO
if
args
.
local_rank
in
[
-
1
,
0
]
else
logging
.
WARN
)
logger
.
info
(
"device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}"
.
format
(
args
.
device
,
n_gpu
,
bool
(
args
.
local_rank
!=
-
1
),
args
.
fp16
))
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
model_name_or_path
)
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
model_name_or_path
)
model
=
Bert
Model
.
from_pretrained
(
args
.
model_name_or_path
)
model
=
Bert
ForSequenceClassification
.
from_pretrained
(
args
.
model_name_or_path
)
model
.
to
(
device
)
model
.
to
(
args
.
device
)
model
.
eval
()
model
.
eval
()
...
...
examples/run_classifier.py
View file @
68ab9599
...
@@ -187,7 +187,7 @@ def main():
...
@@ -187,7 +187,7 @@ def main():
if
os
.
path
.
exists
(
args
.
output_dir
)
and
os
.
listdir
(
args
.
output_dir
)
and
args
.
do_train
and
not
args
.
overwrite_output_dir
:
if
os
.
path
.
exists
(
args
.
output_dir
)
and
os
.
listdir
(
args
.
output_dir
)
and
args
.
do_train
and
not
args
.
overwrite_output_dir
:
raise
ValueError
(
"Output directory ({}) already exists and is not empty."
.
format
(
args
.
output_dir
))
raise
ValueError
(
"Output directory ({}) already exists and is not empty."
.
format
(
args
.
output_dir
))
if
not
os
.
path
.
exists
(
args
.
output_dir
):
if
not
os
.
path
.
exists
(
args
.
output_dir
)
and
args
.
local_rank
in
[
-
1
,
0
]
:
os
.
makedirs
(
args
.
output_dir
)
os
.
makedirs
(
args
.
output_dir
)
task_name
=
args
.
task_name
.
lower
()
task_name
=
args
.
task_name
.
lower
()
...
@@ -361,6 +361,10 @@ def main():
...
@@ -361,6 +361,10 @@ def main():
# Load a trained model and vocabulary that you have fine-tuned
# Load a trained model and vocabulary that you have fine-tuned
model
=
BertForSequenceClassification
.
from_pretrained
(
args
.
output_dir
,
num_labels
=
num_labels
)
model
=
BertForSequenceClassification
.
from_pretrained
(
args
.
output_dir
,
num_labels
=
num_labels
)
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
output_dir
,
do_lower_case
=
args
.
do_lower_case
)
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
output_dir
,
do_lower_case
=
args
.
do_lower_case
)
# Good practice: save your training arguments together with the trained model
output_args_file
=
os
.
path
.
join
(
args
.
output_dir
,
'training_args.bin'
)
torch
.
save
(
args
,
output_args_file
)
else
:
else
:
model
=
BertForSequenceClassification
.
from_pretrained
(
args
.
bert_model
)
model
=
BertForSequenceClassification
.
from_pretrained
(
args
.
bert_model
)
...
...
examples/run_squad.py
View file @
68ab9599
...
@@ -331,6 +331,10 @@ def main():
...
@@ -331,6 +331,10 @@ def main():
# Load a trained model and vocabulary that you have fine-tuned
# Load a trained model and vocabulary that you have fine-tuned
model
=
BertForQuestionAnswering
.
from_pretrained
(
args
.
output_dir
)
model
=
BertForQuestionAnswering
.
from_pretrained
(
args
.
output_dir
)
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
output_dir
,
do_lower_case
=
args
.
do_lower_case
)
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
output_dir
,
do_lower_case
=
args
.
do_lower_case
)
# Good practice: save your training arguments together with the trained model
output_args_file
=
os
.
path
.
join
(
args
.
output_dir
,
'training_args.bin'
)
torch
.
save
(
args
,
output_args_file
)
else
:
else
:
model
=
BertForQuestionAnswering
.
from_pretrained
(
args
.
bert_model
)
model
=
BertForQuestionAnswering
.
from_pretrained
(
args
.
bert_model
)
...
...
pytorch_pretrained_bert/modeling.py
View file @
68ab9599
...
@@ -46,8 +46,7 @@ PRETRAINED_MODEL_ARCHIVE_MAP = {
...
@@ -46,8 +46,7 @@ PRETRAINED_MODEL_ARCHIVE_MAP = {
'bert-large-cased-whole-word-masking'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-pytorch_model.bin"
,
'bert-large-cased-whole-word-masking'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-pytorch_model.bin"
,
'bert-large-uncased-whole-word-masking-finetuned-squad'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-finetuned-squad-pytorch_model.bin"
,
'bert-large-uncased-whole-word-masking-finetuned-squad'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-finetuned-squad-pytorch_model.bin"
,
'bert-large-cased-whole-word-masking-finetuned-squad'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-finetuned-squad-pytorch_model.bin"
,
'bert-large-cased-whole-word-masking-finetuned-squad'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-finetuned-squad-pytorch_model.bin"
,
'bert-base-uncased-finetuned-mrpc'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-finetuned-mrpc-pytorch_model.bin"
,
'bert-base-cased-finetuned-mrpc'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-finetuned-mrpc-pytorch_model.bin"
,
'bert-large-uncased-whole-word-masking-finetuned-mrpc'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-finetuned-mrpc-pytorch_model.bin"
,
}
}
PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
'bert-base-uncased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json"
,
'bert-base-uncased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json"
,
...
@@ -60,6 +59,9 @@ PRETRAINED_CONFIG_ARCHIVE_MAP = {
...
@@ -60,6 +59,9 @@ PRETRAINED_CONFIG_ARCHIVE_MAP = {
'bert-base-german-cased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-cased-config.json"
,
'bert-base-german-cased'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-cased-config.json"
,
'bert-large-uncased-whole-word-masking'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-config.json"
,
'bert-large-uncased-whole-word-masking'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-config.json"
,
'bert-large-cased-whole-word-masking'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-config.json"
,
'bert-large-cased-whole-word-masking'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-config.json"
,
'bert-large-uncased-whole-word-masking-finetuned-squad'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-finetuned-squad-config.json"
,
'bert-large-cased-whole-word-masking-finetuned-squad'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-finetuned-squad-config.json"
,
'bert-base-cased-finetuned-mrpc'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-finetuned-mrpc-config.json"
,
}
}
BERT_CONFIG_NAME
=
'bert_config.json'
BERT_CONFIG_NAME
=
'bert_config.json'
TF_WEIGHTS_NAME
=
'model.ckpt'
TF_WEIGHTS_NAME
=
'model.ckpt'
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment