Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
632f2d2d
Unverified
Commit
632f2d2d
authored
Dec 11, 2018
by
Thomas Wolf
Committed by
GitHub
Dec 11, 2018
Browse files
Merge branch 'master' into fourth-release
parents
b13abfa9
a3a3180c
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
28 additions
and
33 deletions
+28
-33
README.md
README.md
+1
-1
examples/run_classifier.py
examples/run_classifier.py
+24
-30
pytorch_pretrained_bert/optimization.py
pytorch_pretrained_bert/optimization.py
+3
-2
No files found.
README.md
View file @
632f2d2d
...
@@ -19,7 +19,7 @@ This implementation is provided with [Google's pre-trained models](https://githu
...
@@ -19,7 +19,7 @@ This implementation is provided with [Google's pre-trained models](https://githu
## Installation
## Installation
This repo was tested on Python 3.
5
+ and PyTorch 0.4.1
This repo was tested on Python 3.
6
+ and PyTorch 0.4.1
### With pip
### With pip
...
...
examples/run_classifier.py
View file @
632f2d2d
...
@@ -196,9 +196,7 @@ class ColaProcessor(DataProcessor):
...
@@ -196,9 +196,7 @@ class ColaProcessor(DataProcessor):
def
convert_examples_to_features
(
examples
,
label_list
,
max_seq_length
,
tokenizer
):
def
convert_examples_to_features
(
examples
,
label_list
,
max_seq_length
,
tokenizer
):
"""Loads a data file into a list of `InputBatch`s."""
"""Loads a data file into a list of `InputBatch`s."""
label_map
=
{}
label_map
=
{
label
:
i
for
i
,
label
in
enumerate
(
label_list
)}
for
(
i
,
label
)
in
enumerate
(
label_list
):
label_map
[
label
]
=
i
features
=
[]
features
=
[]
for
(
ex_index
,
example
)
in
enumerate
(
examples
):
for
(
ex_index
,
example
)
in
enumerate
(
examples
):
...
@@ -207,8 +205,6 @@ def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer
...
@@ -207,8 +205,6 @@ def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer
tokens_b
=
None
tokens_b
=
None
if
example
.
text_b
:
if
example
.
text_b
:
tokens_b
=
tokenizer
.
tokenize
(
example
.
text_b
)
tokens_b
=
tokenizer
.
tokenize
(
example
.
text_b
)
if
tokens_b
:
# Modifies `tokens_a` and `tokens_b` in place so that the total
# Modifies `tokens_a` and `tokens_b` in place so that the total
# length is less than the specified length.
# length is less than the specified length.
# Account for [CLS], [SEP], [SEP] with "- 3"
# Account for [CLS], [SEP], [SEP] with "- 3"
...
@@ -216,7 +212,7 @@ def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer
...
@@ -216,7 +212,7 @@ def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer
else
:
else
:
# Account for [CLS] and [SEP] with "- 2"
# Account for [CLS] and [SEP] with "- 2"
if
len
(
tokens_a
)
>
max_seq_length
-
2
:
if
len
(
tokens_a
)
>
max_seq_length
-
2
:
tokens_a
=
tokens_a
[
0
:(
max_seq_length
-
2
)]
tokens_a
=
tokens_a
[:(
max_seq_length
-
2
)]
# The convention in BERT is:
# The convention in BERT is:
# (a) For sequence pairs:
# (a) For sequence pairs:
...
@@ -236,22 +232,12 @@ def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer
...
@@ -236,22 +232,12 @@ def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer
# For classification tasks, the first vector (corresponding to [CLS]) is
# For classification tasks, the first vector (corresponding to [CLS]) is
# used as as the "sentence vector". Note that this only makes sense because
# used as as the "sentence vector". Note that this only makes sense because
# the entire model is fine-tuned.
# the entire model is fine-tuned.
tokens
=
[]
tokens
=
[
"[CLS]"
]
+
tokens_a
+
[
"[SEP]"
]
segment_ids
=
[]
segment_ids
=
[
0
]
*
len
(
tokens
)
tokens
.
append
(
"[CLS]"
)
segment_ids
.
append
(
0
)
for
token
in
tokens_a
:
tokens
.
append
(
token
)
segment_ids
.
append
(
0
)
tokens
.
append
(
"[SEP]"
)
segment_ids
.
append
(
0
)
if
tokens_b
:
if
tokens_b
:
for
token
in
tokens_b
:
tokens
+=
tokens_b
+
[
"[SEP]"
]
tokens
.
append
(
token
)
segment_ids
+=
[
1
]
*
(
len
(
tokens_b
)
+
1
)
segment_ids
.
append
(
1
)
tokens
.
append
(
"[SEP]"
)
segment_ids
.
append
(
1
)
input_ids
=
tokenizer
.
convert_tokens_to_ids
(
tokens
)
input_ids
=
tokenizer
.
convert_tokens_to_ids
(
tokens
)
...
@@ -260,10 +246,10 @@ def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer
...
@@ -260,10 +246,10 @@ def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer
input_mask
=
[
1
]
*
len
(
input_ids
)
input_mask
=
[
1
]
*
len
(
input_ids
)
# Zero-pad up to the sequence length.
# Zero-pad up to the sequence length.
while
len
(
input_ids
)
<
max_seq_length
:
padding
=
[
0
]
*
(
max_seq_length
-
len
(
input_ids
))
input_ids
.
append
(
0
)
input_ids
+=
padding
input_mask
.
append
(
0
)
input_mask
+=
padding
segment_ids
.
append
(
0
)
segment_ids
+=
padding
assert
len
(
input_ids
)
==
max_seq_length
assert
len
(
input_ids
)
==
max_seq_length
assert
len
(
input_mask
)
==
max_seq_length
assert
len
(
input_mask
)
==
max_seq_length
...
@@ -437,6 +423,12 @@ def main():
...
@@ -437,6 +423,12 @@ def main():
"mrpc"
:
MrpcProcessor
,
"mrpc"
:
MrpcProcessor
,
}
}
num_labels_task
=
{
"cola"
:
2
,
"mnli"
:
3
,
"mrpc"
:
2
,
}
if
args
.
local_rank
==
-
1
or
args
.
no_cuda
:
if
args
.
local_rank
==
-
1
or
args
.
no_cuda
:
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
and
not
args
.
no_cuda
else
"cpu"
)
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
and
not
args
.
no_cuda
else
"cpu"
)
n_gpu
=
torch
.
cuda
.
device_count
()
n_gpu
=
torch
.
cuda
.
device_count
()
...
@@ -475,6 +467,7 @@ def main():
...
@@ -475,6 +467,7 @@ def main():
raise
ValueError
(
"Task not found: %s"
%
(
task_name
))
raise
ValueError
(
"Task not found: %s"
%
(
task_name
))
processor
=
processors
[
task_name
]()
processor
=
processors
[
task_name
]()
num_labels
=
num_labels_task
[
task_name
]
label_list
=
processor
.
get_labels
()
label_list
=
processor
.
get_labels
()
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
bert_model
,
do_lower_case
=
args
.
do_lower_case
)
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
bert_model
,
do_lower_case
=
args
.
do_lower_case
)
...
@@ -487,8 +480,9 @@ def main():
...
@@ -487,8 +480,9 @@ def main():
len
(
train_examples
)
/
args
.
train_batch_size
/
args
.
gradient_accumulation_steps
*
args
.
num_train_epochs
)
len
(
train_examples
)
/
args
.
train_batch_size
/
args
.
gradient_accumulation_steps
*
args
.
num_train_epochs
)
# Prepare model
# Prepare model
cache_dir
=
PYTORCH_PRETRAINED_BERT_CACHE
/
'distributed_{}'
.
format
(
args
.
local_rank
)
# for distributed learning
model
=
BertForSequenceClassification
.
from_pretrained
(
args
.
bert_model
,
model
=
BertForSequenceClassification
.
from_pretrained
(
args
.
bert_model
,
cache_dir
=
cache_dir
)
cache_dir
=
PYTORCH_PRETRAINED_BERT_CACHE
/
'distributed_{}'
.
format
(
args
.
local_rank
),
num_labels
=
num_labels
)
if
args
.
fp16
:
if
args
.
fp16
:
model
.
half
()
model
.
half
()
model
.
to
(
device
)
model
.
to
(
device
)
...
...
pytorch_pretrained_bert/optimization.py
View file @
632f2d2d
...
@@ -17,6 +17,7 @@
...
@@ -17,6 +17,7 @@
import
math
import
math
import
torch
import
torch
from
torch.optim
import
Optimizer
from
torch.optim
import
Optimizer
from
torch.optim.optimizer
import
required
from
torch.nn.utils
import
clip_grad_norm_
from
torch.nn.utils
import
clip_grad_norm_
def
warmup_cosine
(
x
,
warmup
=
0.002
):
def
warmup_cosine
(
x
,
warmup
=
0.002
):
...
@@ -55,10 +56,10 @@ class BertAdam(Optimizer):
...
@@ -55,10 +56,10 @@ class BertAdam(Optimizer):
weight_decay_rate: Weight decay. Default: 0.01
weight_decay_rate: Weight decay. Default: 0.01
max_grad_norm: Maximum norm for the gradients (-1 means no clipping). Default: 1.0
max_grad_norm: Maximum norm for the gradients (-1 means no clipping). Default: 1.0
"""
"""
def
__init__
(
self
,
params
,
lr
,
warmup
=-
1
,
t_total
=-
1
,
schedule
=
'warmup_linear'
,
def
__init__
(
self
,
params
,
lr
=
required
,
warmup
=-
1
,
t_total
=-
1
,
schedule
=
'warmup_linear'
,
b1
=
0.9
,
b2
=
0.999
,
e
=
1e-6
,
weight_decay_rate
=
0.01
,
b1
=
0.9
,
b2
=
0.999
,
e
=
1e-6
,
weight_decay_rate
=
0.01
,
max_grad_norm
=
1.0
):
max_grad_norm
=
1.0
):
if
not
lr
>=
0.0
:
if
lr
is
not
required
and
lr
<
0.0
:
raise
ValueError
(
"Invalid learning rate: {} - should be >= 0.0"
.
format
(
lr
))
raise
ValueError
(
"Invalid learning rate: {} - should be >= 0.0"
.
format
(
lr
))
if
schedule
not
in
SCHEDULES
:
if
schedule
not
in
SCHEDULES
:
raise
ValueError
(
"Invalid schedule parameter: {}"
.
format
(
schedule
))
raise
ValueError
(
"Invalid schedule parameter: {}"
.
format
(
schedule
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment