Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
4c721c6b
Commit
4c721c6b
authored
Mar 15, 2019
by
Ananya Harsh Jha
Browse files
added eval time metrics for GLUE tasks
parent
043c8781
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
123 additions
and
36 deletions
+123
-36
examples/run_classifier.py
examples/run_classifier.py
+123
-36
No files found.
examples/run_classifier.py
View file @
4c721c6b
...
@@ -31,6 +31,10 @@ from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
...
@@ -31,6 +31,10 @@ from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
from
torch.utils.data.distributed
import
DistributedSampler
from
torch.utils.data.distributed
import
DistributedSampler
from
tqdm
import
tqdm
,
trange
from
tqdm
import
tqdm
,
trange
from
torch.nn
import
CrossEntropyLoss
,
MSELoss
from
scipy.stats
import
pearsonr
,
spearmanr
from
sklearn.metrics
import
matthews_corrcoef
,
f1_score
from
pytorch_pretrained_bert.file_utils
import
PYTORCH_PRETRAINED_BERT_CACHE
from
pytorch_pretrained_bert.file_utils
import
PYTORCH_PRETRAINED_BERT_CACHE
from
pytorch_pretrained_bert.modeling
import
BertForSequenceClassification
,
BertConfig
,
WEIGHTS_NAME
,
CONFIG_NAME
from
pytorch_pretrained_bert.modeling
import
BertForSequenceClassification
,
BertConfig
,
WEIGHTS_NAME
,
CONFIG_NAME
from
pytorch_pretrained_bert.tokenization
import
BertTokenizer
from
pytorch_pretrained_bert.tokenization
import
BertTokenizer
...
@@ -401,13 +405,17 @@ class WnliProcessor(DataProcessor):
...
@@ -401,13 +405,17 @@ class WnliProcessor(DataProcessor):
return
examples
return
examples
def
convert_examples_to_features
(
examples
,
label_list
,
max_seq_length
,
tokenizer
):
def
convert_examples_to_features
(
examples
,
label_list
,
max_seq_length
,
tokenizer
,
output_mode
):
"""Loads a data file into a list of `InputBatch`s."""
"""Loads a data file into a list of `InputBatch`s."""
label_map
=
{
label
:
i
for
i
,
label
in
enumerate
(
label_list
)}
label_map
=
{
label
:
i
for
i
,
label
in
enumerate
(
label_list
)}
features
=
[]
features
=
[]
for
(
ex_index
,
example
)
in
enumerate
(
examples
):
for
(
ex_index
,
example
)
in
enumerate
(
examples
):
if
ex_index
%
10000
==
0
:
logger
.
info
(
"Writing example %d of %d"
%
(
ex_index
,
len
(
examples
)))
tokens_a
=
tokenizer
.
tokenize
(
example
.
text_a
)
tokens_a
=
tokenizer
.
tokenize
(
example
.
text_a
)
tokens_b
=
None
tokens_b
=
None
...
@@ -463,7 +471,13 @@ def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer
...
@@ -463,7 +471,13 @@ def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer
assert
len
(
input_mask
)
==
max_seq_length
assert
len
(
input_mask
)
==
max_seq_length
assert
len
(
segment_ids
)
==
max_seq_length
assert
len
(
segment_ids
)
==
max_seq_length
label_id
=
label_map
[
example
.
label
]
if
output_mode
==
"classification"
:
label_id
=
label_map
[
example
.
label
]
elif
output_mode
==
"regression"
:
label_id
=
float
(
example
.
label
)
else
:
raise
KeyError
(
output_mode
)
if
ex_index
<
5
:
if
ex_index
<
5
:
logger
.
info
(
"*** Example ***"
)
logger
.
info
(
"*** Example ***"
)
logger
.
info
(
"guid: %s"
%
(
example
.
guid
))
logger
.
info
(
"guid: %s"
%
(
example
.
guid
))
...
@@ -499,9 +513,56 @@ def _truncate_seq_pair(tokens_a, tokens_b, max_length):
...
@@ -499,9 +513,56 @@ def _truncate_seq_pair(tokens_a, tokens_b, max_length):
else
:
else
:
tokens_b
.
pop
()
tokens_b
.
pop
()
def
accuracy
(
out
,
labels
):
outputs
=
np
.
argmax
(
out
,
axis
=
1
)
def
simple_accuracy
(
preds
,
labels
):
return
np
.
sum
(
outputs
==
labels
)
return
(
preds
==
labels
).
mean
()
def
acc_and_f1
(
preds
,
labels
):
acc
=
simple_accuracy
(
preds
,
labels
)
f1
=
f1_score
(
y_true
=
labels
,
y_pred
=
preds
)
return
{
"acc"
:
acc
,
"f1"
:
f1
,
"acc_and_f1"
:
(
acc
+
f1
)
/
2
,
}
def
pearson_and_spearman
(
preds
,
labels
):
pearson_corr
=
pearsonr
(
preds
,
labels
)[
0
]
spearman_corr
=
spearmanr
(
preds
,
labels
)[
0
]
return
{
"pearson"
:
pearson_corr
,
"spearmanr"
:
spearman_corr
,
"corr"
:
(
pearson_corr
+
spearman_corr
)
/
2
,
}
def
compute_metrics
(
task_name
,
preds
,
labels
):
assert
len
(
preds
)
==
len
(
labels
)
if
task_name
==
"cola"
:
return
{
"mcc"
:
matthews_corrcoef
(
labels
,
preds
)}
elif
task_name
==
"sst-2"
:
return
{
"acc"
:
simple_accuracy
(
preds
,
labels
)}
elif
task_name
==
"mrpc"
:
return
acc_and_f1
(
preds
,
labels
)
elif
task_name
==
"sts-b"
:
return
pearson_and_spearman
(
preds
,
labels
)
elif
task_name
==
"qqp"
:
return
acc_and_f1
(
preds
,
labels
)
elif
task_name
==
"mnli"
:
return
{
"acc"
:
simple_accuracy
(
preds
,
labels
)}
elif
task_name
==
"mnli-mm"
:
return
{
"acc"
:
simple_accuracy
(
preds
,
labels
)}
elif
task_name
==
"qnli"
:
return
{
"acc"
:
simple_accuracy
(
preds
,
labels
)}
elif
task_name
==
"rte"
:
return
{
"acc"
:
simple_accuracy
(
preds
,
labels
)}
elif
task_name
==
"wnli"
:
return
{
"acc"
:
simple_accuracy
(
preds
,
labels
)}
else
:
raise
KeyError
(
task_name
)
def
main
():
def
main
():
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
...
@@ -605,6 +666,7 @@ def main():
...
@@ -605,6 +666,7 @@ def main():
processors
=
{
processors
=
{
"cola"
:
ColaProcessor
,
"cola"
:
ColaProcessor
,
"mnli"
:
MnliProcessor
,
"mnli"
:
MnliProcessor
,
"mnli-mm"
:
MnliMismatchedProcessor
,
"mrpc"
:
MrpcProcessor
,
"mrpc"
:
MrpcProcessor
,
"sst-2"
:
Sst2Processor
,
"sst-2"
:
Sst2Processor
,
"sts-b"
:
StsbProcessor
,
"sts-b"
:
StsbProcessor
,
...
@@ -617,6 +679,7 @@ def main():
...
@@ -617,6 +679,7 @@ def main():
output_modes
=
{
output_modes
=
{
"cola"
:
"classification"
,
"cola"
:
"classification"
,
"mnli"
:
"classification"
,
"mnli"
:
"classification"
,
"mnli-mm"
:
"classification"
,
"mrpc"
:
"classification"
,
"mrpc"
:
"classification"
,
"sst-2"
:
"classification"
,
"sst-2"
:
"classification"
,
"sts-b"
:
"regression"
,
"sts-b"
:
"regression"
,
...
@@ -626,13 +689,6 @@ def main():
...
@@ -626,13 +689,6 @@ def main():
"wnli"
:
"classification"
,
"wnli"
:
"classification"
,
}
}
num_labels_task
=
{
"cola"
:
2
,
"sst-2"
:
2
,
"mnli"
:
3
,
"mrpc"
:
2
,
}
if
args
.
local_rank
==
-
1
or
args
.
no_cuda
:
if
args
.
local_rank
==
-
1
or
args
.
no_cuda
:
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
and
not
args
.
no_cuda
else
"cpu"
)
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
and
not
args
.
no_cuda
else
"cpu"
)
n_gpu
=
torch
.
cuda
.
device_count
()
n_gpu
=
torch
.
cuda
.
device_count
()
...
@@ -671,8 +727,10 @@ def main():
...
@@ -671,8 +727,10 @@ def main():
raise
ValueError
(
"Task not found: %s"
%
(
task_name
))
raise
ValueError
(
"Task not found: %s"
%
(
task_name
))
processor
=
processors
[
task_name
]()
processor
=
processors
[
task_name
]()
num_labels
=
num_labels_task
[
task_name
]
output_mode
=
output_modes
[
task_name
]
label_list
=
processor
.
get_labels
()
label_list
=
processor
.
get_labels
()
num_labels
=
len
(
label_list
)
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
bert_model
,
do_lower_case
=
args
.
do_lower_case
)
tokenizer
=
BertTokenizer
.
from_pretrained
(
args
.
bert_model
,
do_lower_case
=
args
.
do_lower_case
)
...
@@ -689,7 +747,7 @@ def main():
...
@@ -689,7 +747,7 @@ def main():
cache_dir
=
args
.
cache_dir
if
args
.
cache_dir
else
os
.
path
.
join
(
str
(
PYTORCH_PRETRAINED_BERT_CACHE
),
'distributed_{}'
.
format
(
args
.
local_rank
))
cache_dir
=
args
.
cache_dir
if
args
.
cache_dir
else
os
.
path
.
join
(
str
(
PYTORCH_PRETRAINED_BERT_CACHE
),
'distributed_{}'
.
format
(
args
.
local_rank
))
model
=
BertForSequenceClassification
.
from_pretrained
(
args
.
bert_model
,
model
=
BertForSequenceClassification
.
from_pretrained
(
args
.
bert_model
,
cache_dir
=
cache_dir
,
cache_dir
=
cache_dir
,
num_labels
=
num_labels
)
num_labels
=
num_labels
)
if
args
.
fp16
:
if
args
.
fp16
:
model
.
half
()
model
.
half
()
model
.
to
(
device
)
model
.
to
(
device
)
...
@@ -737,7 +795,7 @@ def main():
...
@@ -737,7 +795,7 @@ def main():
tr_loss
=
0
tr_loss
=
0
if
args
.
do_train
:
if
args
.
do_train
:
train_features
=
convert_examples_to_features
(
train_features
=
convert_examples_to_features
(
train_examples
,
label_list
,
args
.
max_seq_length
,
tokenizer
)
train_examples
,
label_list
,
args
.
max_seq_length
,
tokenizer
,
output_mode
)
logger
.
info
(
"***** Running training *****"
)
logger
.
info
(
"***** Running training *****"
)
logger
.
info
(
" Num examples = %d"
,
len
(
train_examples
))
logger
.
info
(
" Num examples = %d"
,
len
(
train_examples
))
logger
.
info
(
" Batch size = %d"
,
args
.
train_batch_size
)
logger
.
info
(
" Batch size = %d"
,
args
.
train_batch_size
)
...
@@ -745,7 +803,12 @@ def main():
...
@@ -745,7 +803,12 @@ def main():
all_input_ids
=
torch
.
tensor
([
f
.
input_ids
for
f
in
train_features
],
dtype
=
torch
.
long
)
all_input_ids
=
torch
.
tensor
([
f
.
input_ids
for
f
in
train_features
],
dtype
=
torch
.
long
)
all_input_mask
=
torch
.
tensor
([
f
.
input_mask
for
f
in
train_features
],
dtype
=
torch
.
long
)
all_input_mask
=
torch
.
tensor
([
f
.
input_mask
for
f
in
train_features
],
dtype
=
torch
.
long
)
all_segment_ids
=
torch
.
tensor
([
f
.
segment_ids
for
f
in
train_features
],
dtype
=
torch
.
long
)
all_segment_ids
=
torch
.
tensor
([
f
.
segment_ids
for
f
in
train_features
],
dtype
=
torch
.
long
)
all_label_ids
=
torch
.
tensor
([
f
.
label_id
for
f
in
train_features
],
dtype
=
torch
.
long
)
if
output_mode
==
"classification"
:
all_label_ids
=
torch
.
tensor
([
f
.
label_id
for
f
in
train_features
],
dtype
=
torch
.
long
)
elif
output_mode
==
"regression"
:
all_label_ids
=
torch
.
tensor
([
f
.
label_id
for
f
in
train_features
],
dtype
=
torch
.
float
)
train_data
=
TensorDataset
(
all_input_ids
,
all_input_mask
,
all_segment_ids
,
all_label_ids
)
train_data
=
TensorDataset
(
all_input_ids
,
all_input_mask
,
all_segment_ids
,
all_label_ids
)
if
args
.
local_rank
==
-
1
:
if
args
.
local_rank
==
-
1
:
train_sampler
=
RandomSampler
(
train_data
)
train_sampler
=
RandomSampler
(
train_data
)
...
@@ -760,7 +823,17 @@ def main():
...
@@ -760,7 +823,17 @@ def main():
for
step
,
batch
in
enumerate
(
tqdm
(
train_dataloader
,
desc
=
"Iteration"
)):
for
step
,
batch
in
enumerate
(
tqdm
(
train_dataloader
,
desc
=
"Iteration"
)):
batch
=
tuple
(
t
.
to
(
device
)
for
t
in
batch
)
batch
=
tuple
(
t
.
to
(
device
)
for
t
in
batch
)
input_ids
,
input_mask
,
segment_ids
,
label_ids
=
batch
input_ids
,
input_mask
,
segment_ids
,
label_ids
=
batch
loss
=
model
(
input_ids
,
segment_ids
,
input_mask
,
label_ids
)
# define a new function to compute loss values for both output_modes
logits
=
model
(
input_ids
,
segment_ids
,
input_mask
,
labels
=
None
)
if
output_mode
==
"classification"
:
loss_fct
=
CrossEntropyLoss
()
loss
=
loss_fct
(
logits
.
view
(
-
1
,
num_labels
),
label_ids
.
view
(
-
1
))
elif
output_mode
==
"regression"
:
loss_fct
=
MSELoss
()
loss
=
loss_fct
(
logits
.
view
(
-
1
),
label_ids
.
view
(
-
1
))
if
n_gpu
>
1
:
if
n_gpu
>
1
:
loss
=
loss
.
mean
()
# mean() to average on multi-gpu.
loss
=
loss
.
mean
()
# mean() to average on multi-gpu.
if
args
.
gradient_accumulation_steps
>
1
:
if
args
.
gradient_accumulation_steps
>
1
:
...
@@ -805,22 +878,28 @@ def main():
...
@@ -805,22 +878,28 @@ def main():
if
args
.
do_eval
and
(
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
):
if
args
.
do_eval
and
(
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
):
eval_examples
=
processor
.
get_dev_examples
(
args
.
data_dir
)
eval_examples
=
processor
.
get_dev_examples
(
args
.
data_dir
)
eval_features
=
convert_examples_to_features
(
eval_features
=
convert_examples_to_features
(
eval_examples
,
label_list
,
args
.
max_seq_length
,
tokenizer
)
eval_examples
,
label_list
,
args
.
max_seq_length
,
tokenizer
,
output_mode
)
logger
.
info
(
"***** Running evaluation *****"
)
logger
.
info
(
"***** Running evaluation *****"
)
logger
.
info
(
" Num examples = %d"
,
len
(
eval_examples
))
logger
.
info
(
" Num examples = %d"
,
len
(
eval_examples
))
logger
.
info
(
" Batch size = %d"
,
args
.
eval_batch_size
)
logger
.
info
(
" Batch size = %d"
,
args
.
eval_batch_size
)
all_input_ids
=
torch
.
tensor
([
f
.
input_ids
for
f
in
eval_features
],
dtype
=
torch
.
long
)
all_input_ids
=
torch
.
tensor
([
f
.
input_ids
for
f
in
eval_features
],
dtype
=
torch
.
long
)
all_input_mask
=
torch
.
tensor
([
f
.
input_mask
for
f
in
eval_features
],
dtype
=
torch
.
long
)
all_input_mask
=
torch
.
tensor
([
f
.
input_mask
for
f
in
eval_features
],
dtype
=
torch
.
long
)
all_segment_ids
=
torch
.
tensor
([
f
.
segment_ids
for
f
in
eval_features
],
dtype
=
torch
.
long
)
all_segment_ids
=
torch
.
tensor
([
f
.
segment_ids
for
f
in
eval_features
],
dtype
=
torch
.
long
)
all_label_ids
=
torch
.
tensor
([
f
.
label_id
for
f
in
eval_features
],
dtype
=
torch
.
long
)
if
output_mode
==
"classification"
:
all_label_ids
=
torch
.
tensor
([
f
.
label_id
for
f
in
eval_features
],
dtype
=
torch
.
long
)
elif
output_mode
==
"regression"
:
all_label_ids
=
torch
.
tensor
([
f
.
label_id
for
f
in
eval_features
],
dtype
=
torch
.
float
)
eval_data
=
TensorDataset
(
all_input_ids
,
all_input_mask
,
all_segment_ids
,
all_label_ids
)
eval_data
=
TensorDataset
(
all_input_ids
,
all_input_mask
,
all_segment_ids
,
all_label_ids
)
# Run prediction for full data
# Run prediction for full data
eval_sampler
=
SequentialSampler
(
eval_data
)
eval_sampler
=
SequentialSampler
(
eval_data
)
eval_dataloader
=
DataLoader
(
eval_data
,
sampler
=
eval_sampler
,
batch_size
=
args
.
eval_batch_size
)
eval_dataloader
=
DataLoader
(
eval_data
,
sampler
=
eval_sampler
,
batch_size
=
args
.
eval_batch_size
)
model
.
eval
()
model
.
eval
()
eval_loss
,
eval_accuracy
=
0
,
0
eval_loss
=
0
nb_eval_steps
,
nb_eval_examples
=
0
,
0
nb_eval_steps
=
0
preds
=
[]
for
input_ids
,
input_mask
,
segment_ids
,
label_ids
in
tqdm
(
eval_dataloader
,
desc
=
"Evaluating"
):
for
input_ids
,
input_mask
,
segment_ids
,
label_ids
in
tqdm
(
eval_dataloader
,
desc
=
"Evaluating"
):
input_ids
=
input_ids
.
to
(
device
)
input_ids
=
input_ids
.
to
(
device
)
...
@@ -829,26 +908,34 @@ def main():
...
@@ -829,26 +908,34 @@ def main():
label_ids
=
label_ids
.
to
(
device
)
label_ids
=
label_ids
.
to
(
device
)
with
torch
.
no_grad
():
with
torch
.
no_grad
():
tmp_eval_loss
=
model
(
input_ids
,
segment_ids
,
input_mask
,
label_ids
)
logits
=
model
(
input_ids
,
segment_ids
,
input_mask
,
labels
=
None
)
logits
=
model
(
input_ids
,
segment_ids
,
input_mask
)
# create eval loss and other metric required by the task
logits
=
logits
.
detach
().
cpu
().
numpy
()
if
output_mode
==
"classification"
:
label_ids
=
label_ids
.
to
(
'cpu'
).
numpy
()
loss_fct
=
CrossEntropyLoss
()
tmp_eval_accuracy
=
accuracy
(
logits
,
label_ids
)
tmp_eval_loss
=
loss_fct
(
logits
.
view
(
-
1
,
num_labels
),
label_ids
.
view
(
-
1
))
elif
output_mode
==
"regression"
:
loss_fct
=
MSELoss
()
tmp_eval_loss
=
loss_fct
(
logits
.
view
(
-
1
),
label_ids
.
view
(
-
1
))
eval_loss
+=
tmp_eval_loss
.
mean
().
item
()
eval_loss
+=
tmp_eval_loss
.
mean
().
item
()
eval_accuracy
+=
tmp_eval_accuracy
nb_eval_examples
+=
input_ids
.
size
(
0
)
nb_eval_steps
+=
1
nb_eval_steps
+=
1
if
len
(
preds
)
==
0
:
preds
.
append
(
logits
.
detach
().
cpu
().
numpy
())
else
:
preds
[
0
]
=
np
.
append
(
preds
[
0
],
logits
.
detach
().
cpu
().
numpy
(),
axis
=
0
)
eval_loss
=
eval_loss
/
nb_eval_steps
eval_loss
=
eval_loss
/
nb_eval_steps
eval_accuracy
=
eval_accuracy
/
nb_eval_examples
preds
=
preds
[
0
]
if
output_mode
==
"classification"
:
preds
=
np
.
argmax
(
preds
,
axis
=
1
)
result
=
compute_metrics
(
task_name
,
preds
,
all_label_ids
.
numpy
())
loss
=
tr_loss
/
nb_tr_steps
if
args
.
do_train
else
None
loss
=
tr_loss
/
nb_tr_steps
if
args
.
do_train
else
None
result
=
{
'eval_loss'
:
eval_loss
,
'eval_accuracy'
:
eval_accuracy
,
result
[
'eval_loss'
]
=
eval_loss
'global_step'
:
global_step
,
result
[
'global_step'
]
=
global_step
'loss'
:
loss
}
result
[
'loss'
]
=
loss
output_eval_file
=
os
.
path
.
join
(
args
.
output_dir
,
"eval_results.txt"
)
output_eval_file
=
os
.
path
.
join
(
args
.
output_dir
,
"eval_results.txt"
)
with
open
(
output_eval_file
,
"w"
)
as
writer
:
with
open
(
output_eval_file
,
"w"
)
as
writer
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment