Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
99d45155
Commit
99d45155
authored
Dec 20, 2019
by
Nafise Sadat Moosavi
Committed by
thomwolf
Jan 16, 2020
Browse files
HANS evaluation
parent
dc17f2a1
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
877 additions
and
0 deletions
+877
-0
examples/hans_processors.py
examples/hans_processors.py
+210
-0
examples/test_hans.py
examples/test_hans.py
+543
-0
examples/utils_hans.py
examples/utils_hans.py
+122
-0
src/transformers/data/metrics/__init__.py
src/transformers/data/metrics/__init__.py
+2
-0
No files found.
examples/hans_processors.py
0 → 100644
View file @
99d45155
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" GLUE processors and helpers """
import
logging
import
os
from
utils_hans
import
DataProcessor
,
InputExample
,
InputFeatures
from
transformers.file_utils
import
is_tf_available
if
is_tf_available
():
import
tensorflow
as
tf
logger
=
logging
.
getLogger
(
__name__
)
def
hans_convert_examples_to_features
(
examples
,
tokenizer
,
max_length
=
512
,
task
=
None
,
label_list
=
None
,
output_mode
=
None
,
pad_on_left
=
False
,
pad_token
=
0
,
pad_token_segment_id
=
0
,
mask_padding_with_zero
=
True
):
"""
Loads a data file into a list of ``InputFeatures``
Args:
examples: List of ``InputExamples`` or ``tf.data.Dataset`` containing the examples.
tokenizer: Instance of a tokenizer that will tokenize the examples
max_length: Maximum example length
task: HANS
label_list: List of labels. Can be obtained from the processor using the ``processor.get_labels()`` method
output_mode: String indicating the output mode. Either ``regression`` or ``classification``
pad_on_left: If set to ``True``, the examples will be padded on the left rather than on the right (default)
pad_token: Padding token
pad_token_segment_id: The segment ID for the padding token (It is usually 0, but can vary such as for XLNet where it is 4)
mask_padding_with_zero: If set to ``True``, the attention mask will be filled by ``1`` for actual values
and by ``0`` for padded values. If set to ``False``, inverts it (``1`` for padded values, ``0`` for
actual values)
Returns:
If the ``examples`` input is a ``tf.data.Dataset``, will return a ``tf.data.Dataset``
containing the task-specific features. If the input is a list of ``InputExamples``, will return
a list of task-specific ``InputFeatures`` which can be fed to the model.
"""
is_tf_dataset
=
False
if
is_tf_available
()
and
isinstance
(
examples
,
tf
.
data
.
Dataset
):
is_tf_dataset
=
True
if
task
is
not
None
:
processor
=
glue_processors
[
task
]()
if
label_list
is
None
:
label_list
=
processor
.
get_labels
()
logger
.
info
(
"Using label list %s for task %s"
%
(
label_list
,
task
))
if
output_mode
is
None
:
output_mode
=
glue_output_modes
[
task
]
logger
.
info
(
"Using output mode %s for task %s"
%
(
output_mode
,
task
))
label_map
=
{
label
:
i
for
i
,
label
in
enumerate
(
label_list
)}
features
=
[]
for
(
ex_index
,
example
)
in
enumerate
(
examples
):
if
ex_index
%
10000
==
0
:
logger
.
info
(
"Writing example %d"
%
(
ex_index
))
if
is_tf_dataset
:
example
=
processor
.
get_example_from_tensor_dict
(
example
)
example
=
processor
.
tfds_map
(
example
)
inputs
=
tokenizer
.
encode_plus
(
example
.
text_a
,
example
.
text_b
,
add_special_tokens
=
True
,
max_length
=
max_length
,
)
input_ids
,
token_type_ids
=
inputs
[
"input_ids"
],
inputs
[
"token_type_ids"
]
# The mask has 1 for real tokens and 0 for padding tokens. Only real
# tokens are attended to.
attention_mask
=
[
1
if
mask_padding_with_zero
else
0
]
*
len
(
input_ids
)
# Zero-pad up to the sequence length.
padding_length
=
max_length
-
len
(
input_ids
)
if
pad_on_left
:
input_ids
=
([
pad_token
]
*
padding_length
)
+
input_ids
attention_mask
=
([
0
if
mask_padding_with_zero
else
1
]
*
padding_length
)
+
attention_mask
token_type_ids
=
([
pad_token_segment_id
]
*
padding_length
)
+
token_type_ids
else
:
input_ids
=
input_ids
+
([
pad_token
]
*
padding_length
)
attention_mask
=
attention_mask
+
([
0
if
mask_padding_with_zero
else
1
]
*
padding_length
)
token_type_ids
=
token_type_ids
+
([
pad_token_segment_id
]
*
padding_length
)
assert
len
(
input_ids
)
==
max_length
,
"Error with input length {} vs {}"
.
format
(
len
(
input_ids
),
max_length
)
assert
len
(
attention_mask
)
==
max_length
,
"Error with input length {} vs {}"
.
format
(
len
(
attention_mask
),
max_length
)
assert
len
(
token_type_ids
)
==
max_length
,
"Error with input length {} vs {}"
.
format
(
len
(
token_type_ids
),
max_length
)
if
output_mode
==
"classification"
:
label
=
label_map
[
example
.
label
]
if
example
.
label
in
label_map
else
0
elif
output_mode
==
"regression"
:
label
=
float
(
example
.
label
)
else
:
raise
KeyError
(
output_mode
)
pairID
=
str
(
example
.
pairID
)
if
ex_index
<
10
:
logger
.
info
(
"*** Example ***"
)
logger
.
info
(
"text_a: %s"
%
(
example
.
text_a
))
logger
.
info
(
"text_b: %s"
%
(
example
.
text_b
))
logger
.
info
(
"guid: %s"
%
(
example
.
guid
))
logger
.
info
(
"input_ids: %s"
%
" "
.
join
([
str
(
x
)
for
x
in
input_ids
]))
logger
.
info
(
"attention_mask: %s"
%
" "
.
join
([
str
(
x
)
for
x
in
attention_mask
]))
logger
.
info
(
"token_type_ids: %s"
%
" "
.
join
([
str
(
x
)
for
x
in
token_type_ids
]))
logger
.
info
(
"label: %s (id = %d)"
%
(
example
.
label
,
label
))
features
.
append
(
InputFeatures
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
label
=
label
,
pairID
=
pairID
))
if
is_tf_available
()
and
is_tf_dataset
:
def
gen
():
for
ex
in
features
:
yield
({
'input_ids'
:
ex
.
input_ids
,
'attention_mask'
:
ex
.
attention_mask
,
'token_type_ids'
:
ex
.
token_type_ids
},
ex
.
label
)
return
tf
.
data
.
Dataset
.
from_generator
(
gen
,
({
'input_ids'
:
tf
.
int32
,
'attention_mask'
:
tf
.
int32
,
'token_type_ids'
:
tf
.
int32
},
tf
.
int64
),
({
'input_ids'
:
tf
.
TensorShape
([
None
]),
'attention_mask'
:
tf
.
TensorShape
([
None
]),
'token_type_ids'
:
tf
.
TensorShape
([
None
])},
tf
.
TensorShape
([])))
return
features
class
HansProcessor
(
DataProcessor
):
"""Processor for the HANS data set."""
def
get_example_from_tensor_dict
(
self
,
tensor_dict
):
"""See base class."""
return
InputExample
(
tensor_dict
[
'idx'
].
numpy
(),
tensor_dict
[
'premise'
].
numpy
().
decode
(
'utf-8'
),
tensor_dict
[
'hypothesis'
].
numpy
().
decode
(
'utf-8'
),
str
(
tensor_dict
[
'label'
].
numpy
()))
def
get_train_examples
(
self
,
data_dir
):
"""See base class."""
return
self
.
_create_examples
(
self
.
_read_tsv
(
os
.
path
.
join
(
data_dir
,
"heuristics_train_set.txt"
)),
"train"
)
def
get_dev_examples
(
self
,
data_dir
):
"""See base class."""
return
self
.
_create_examples
(
self
.
_read_tsv
(
os
.
path
.
join
(
data_dir
,
"heuristics_evaluation_set.txt"
)),
"dev"
)
def
get_labels
(
self
):
"""See base class."""
return
[
"contradiction"
,
"entailment"
,
"neutral"
]
def
_create_examples
(
self
,
lines
,
set_type
):
"""Creates examples for the training and dev sets."""
examples
=
[]
for
(
i
,
line
)
in
enumerate
(
lines
):
if
i
==
0
:
continue
guid
=
"%s-%s"
%
(
set_type
,
line
[
0
])
text_a
=
line
[
5
]
text_b
=
line
[
6
]
pairID
=
line
[
7
][
2
:]
if
line
[
7
].
startswith
(
'ex'
)
else
line
[
7
]
label
=
line
[
-
1
]
examples
.
append
(
InputExample
(
guid
=
guid
,
text_a
=
text_a
,
text_b
=
text_b
,
label
=
label
,
pairID
=
pairID
))
return
examples
glue_tasks_num_labels
=
{
"hans"
:
3
,
}
glue_processors
=
{
"hans"
:
HansProcessor
,
}
glue_output_modes
=
{
"hans"
:
"classification"
,
}
examples/test_hans.py
0 → 100644
View file @
99d45155
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Finetuning the library models for sequence classification on GLUE (Bert, XLM, XLNet, RoBERTa)."""
from
__future__
import
absolute_import
,
division
,
print_function
import
argparse
import
glob
import
logging
import
os
import
random
import
json
import
numpy
as
np
import
torch
from
torch.utils.data
import
(
DataLoader
,
RandomSampler
,
SequentialSampler
,
TensorDataset
)
from
torch.utils.data.distributed
import
DistributedSampler
try
:
from
torch.utils.tensorboard
import
SummaryWriter
except
:
from
tensorboardX
import
SummaryWriter
from
tqdm
import
tqdm
,
trange
from
transformers
import
(
WEIGHTS_NAME
,
BertConfig
,
BertForSequenceClassification
,
BertTokenizer
,
RobertaConfig
,
RobertaForSequenceClassification
,
RobertaTokenizer
,
XLMConfig
,
XLMForSequenceClassification
,
XLMTokenizer
,
XLNetConfig
,
XLNetForSequenceClassification
,
XLNetTokenizer
,
DistilBertConfig
,
DistilBertForSequenceClassification
,
DistilBertTokenizer
,
AlbertConfig
,
AlbertForSequenceClassification
,
AlbertTokenizer
,
)
from
transformers
import
AdamW
,
get_linear_schedule_with_warmup
from
transformers
import
glue_compute_metrics
as
compute_metrics
from
hans_processors
import
glue_output_modes
as
output_modes
from
hans_processors
import
glue_processors
as
processors
from
hans_processors
import
hans_convert_examples_to_features
as
convert_examples_to_features
logger
=
logging
.
getLogger
(
__name__
)
ALL_MODELS
=
sum
((
tuple
(
conf
.
pretrained_config_archive_map
.
keys
())
for
conf
in
(
BertConfig
,
XLNetConfig
,
XLMConfig
,
RobertaConfig
,
DistilBertConfig
)),
())
MODEL_CLASSES
=
{
'bert'
:
(
BertConfig
,
BertForSequenceClassification
,
BertTokenizer
),
'xlnet'
:
(
XLNetConfig
,
XLNetForSequenceClassification
,
XLNetTokenizer
),
'xlm'
:
(
XLMConfig
,
XLMForSequenceClassification
,
XLMTokenizer
),
'roberta'
:
(
RobertaConfig
,
RobertaForSequenceClassification
,
RobertaTokenizer
),
'distilbert'
:
(
DistilBertConfig
,
DistilBertForSequenceClassification
,
DistilBertTokenizer
),
'albert'
:
(
AlbertConfig
,
AlbertForSequenceClassification
,
AlbertTokenizer
)
}
def
set_seed
(
args
):
random
.
seed
(
args
.
seed
)
np
.
random
.
seed
(
args
.
seed
)
torch
.
manual_seed
(
args
.
seed
)
if
args
.
n_gpu
>
0
:
torch
.
cuda
.
manual_seed_all
(
args
.
seed
)
def
train
(
args
,
train_dataset
,
model
,
tokenizer
):
""" Train the model """
if
args
.
local_rank
in
[
-
1
,
0
]:
tb_writer
=
SummaryWriter
()
args
.
train_batch_size
=
args
.
per_gpu_train_batch_size
*
max
(
1
,
args
.
n_gpu
)
train_sampler
=
RandomSampler
(
train_dataset
)
if
args
.
local_rank
==
-
1
else
DistributedSampler
(
train_dataset
)
train_dataloader
=
DataLoader
(
train_dataset
,
sampler
=
train_sampler
,
batch_size
=
args
.
train_batch_size
)
if
args
.
max_steps
>
0
:
t_total
=
args
.
max_steps
args
.
num_train_epochs
=
args
.
max_steps
//
(
len
(
train_dataloader
)
//
args
.
gradient_accumulation_steps
)
+
1
else
:
t_total
=
len
(
train_dataloader
)
//
args
.
gradient_accumulation_steps
*
args
.
num_train_epochs
# Prepare optimizer and schedule (linear warmup and decay)
no_decay
=
[
'bias'
,
'LayerNorm.weight'
]
optimizer_grouped_parameters
=
[
{
'params'
:
[
p
for
n
,
p
in
model
.
named_parameters
()
if
not
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
args
.
weight_decay
},
{
'params'
:
[
p
for
n
,
p
in
model
.
named_parameters
()
if
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.0
}
]
optimizer
=
AdamW
(
optimizer_grouped_parameters
,
lr
=
args
.
learning_rate
,
eps
=
args
.
adam_epsilon
)
scheduler
=
get_linear_schedule_with_warmup
(
optimizer
,
num_warmup_steps
=
args
.
warmup_steps
,
num_training_steps
=
t_total
)
if
args
.
fp16
:
try
:
from
apex
import
amp
except
ImportError
:
raise
ImportError
(
"Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
)
model
,
optimizer
=
amp
.
initialize
(
model
,
optimizer
,
opt_level
=
args
.
fp16_opt_level
)
# multi-gpu training (should be after apex fp16 initialization)
if
args
.
n_gpu
>
1
:
model
=
torch
.
nn
.
DataParallel
(
model
)
# Distributed training (should be after apex fp16 initialization)
if
args
.
local_rank
!=
-
1
:
model
=
torch
.
nn
.
parallel
.
DistributedDataParallel
(
model
,
device_ids
=
[
args
.
local_rank
],
output_device
=
args
.
local_rank
,
find_unused_parameters
=
True
)
# Train!
logger
.
info
(
"***** Running training *****"
)
logger
.
info
(
" Num examples = %d"
,
len
(
train_dataset
))
logger
.
info
(
" Num Epochs = %d"
,
args
.
num_train_epochs
)
logger
.
info
(
" Instantaneous batch size per GPU = %d"
,
args
.
per_gpu_train_batch_size
)
logger
.
info
(
" Total train batch size (w. parallel, distributed & accumulation) = %d"
,
args
.
train_batch_size
*
args
.
gradient_accumulation_steps
*
(
torch
.
distributed
.
get_world_size
()
if
args
.
local_rank
!=
-
1
else
1
))
logger
.
info
(
" Gradient Accumulation steps = %d"
,
args
.
gradient_accumulation_steps
)
logger
.
info
(
" Total optimization steps = %d"
,
t_total
)
global_step
=
0
tr_loss
,
logging_loss
=
0.0
,
0.0
model
.
zero_grad
()
train_iterator
=
trange
(
int
(
args
.
num_train_epochs
),
desc
=
"Epoch"
,
disable
=
args
.
local_rank
not
in
[
-
1
,
0
])
set_seed
(
args
)
# Added here for reproductibility (even between python 2 and 3)
for
_
in
train_iterator
:
epoch_iterator
=
tqdm
(
train_dataloader
,
desc
=
"Iteration"
,
disable
=
args
.
local_rank
not
in
[
-
1
,
0
])
for
step
,
batch
in
enumerate
(
epoch_iterator
):
model
.
train
()
batch
=
tuple
(
t
.
to
(
args
.
device
)
for
t
in
batch
)
inputs
=
{
'input_ids'
:
batch
[
0
],
'attention_mask'
:
batch
[
1
],
'labels'
:
batch
[
3
]}
if
args
.
model_type
!=
'distilbert'
:
inputs
[
'token_type_ids'
]
=
batch
[
2
]
if
args
.
model_type
in
[
'bert'
,
'xlnet'
]
else
None
# XLM, DistilBERT and RoBERTa don't use segment_ids
outputs
=
model
(
**
inputs
)
loss
=
outputs
[
0
]
# model outputs are always tuple in transformers (see doc)
if
args
.
n_gpu
>
1
:
loss
=
loss
.
mean
()
# mean() to average on multi-gpu parallel training
if
args
.
gradient_accumulation_steps
>
1
:
loss
=
loss
/
args
.
gradient_accumulation_steps
if
args
.
fp16
:
with
amp
.
scale_loss
(
loss
,
optimizer
)
as
scaled_loss
:
scaled_loss
.
backward
()
else
:
loss
.
backward
()
tr_loss
+=
loss
.
item
()
if
(
step
+
1
)
%
args
.
gradient_accumulation_steps
==
0
:
if
args
.
fp16
:
torch
.
nn
.
utils
.
clip_grad_norm_
(
amp
.
master_params
(
optimizer
),
args
.
max_grad_norm
)
else
:
torch
.
nn
.
utils
.
clip_grad_norm_
(
model
.
parameters
(),
args
.
max_grad_norm
)
optimizer
.
step
()
scheduler
.
step
()
# Update learning rate schedule
model
.
zero_grad
()
global_step
+=
1
if
args
.
local_rank
in
[
-
1
,
0
]
and
args
.
logging_steps
>
0
and
global_step
%
args
.
logging_steps
==
0
:
logs
=
{}
if
args
.
local_rank
==
-
1
and
args
.
evaluate_during_training
:
# Only evaluate when single GPU otherwise metrics may not average well
results
=
evaluate
(
args
,
model
,
tokenizer
)
for
key
,
value
in
results
.
items
():
eval_key
=
'eval_{}'
.
format
(
key
)
logs
[
eval_key
]
=
value
loss_scalar
=
(
tr_loss
-
logging_loss
)
/
args
.
logging_steps
learning_rate_scalar
=
scheduler
.
get_lr
()[
0
]
logs
[
'learning_rate'
]
=
learning_rate_scalar
logs
[
'loss'
]
=
loss_scalar
logging_loss
=
tr_loss
for
key
,
value
in
logs
.
items
():
tb_writer
.
add_scalar
(
key
,
value
,
global_step
)
#print(json.dumps({**logs, **{'step': global_step}}))
if
args
.
local_rank
in
[
-
1
,
0
]
and
args
.
save_steps
>
0
and
global_step
%
args
.
save_steps
==
0
:
# Save model checkpoint
output_dir
=
os
.
path
.
join
(
args
.
output_dir
,
'checkpoint-{}'
.
format
(
global_step
))
if
not
os
.
path
.
exists
(
output_dir
):
os
.
makedirs
(
output_dir
)
model_to_save
=
model
.
module
if
hasattr
(
model
,
'module'
)
else
model
# Take care of distributed/parallel training
model_to_save
.
save_pretrained
(
output_dir
)
torch
.
save
(
args
,
os
.
path
.
join
(
output_dir
,
'training_args.bin'
))
logger
.
info
(
"Saving model checkpoint to %s"
,
output_dir
)
if
args
.
max_steps
>
0
and
global_step
>
args
.
max_steps
:
epoch_iterator
.
close
()
break
if
args
.
max_steps
>
0
and
global_step
>
args
.
max_steps
:
train_iterator
.
close
()
break
if
args
.
local_rank
in
[
-
1
,
0
]:
tb_writer
.
close
()
return
global_step
,
tr_loss
/
global_step
def
evaluate
(
args
,
model
,
tokenizer
,
prefix
=
""
):
# Loop to handle MNLI double evaluation (matched, mis-matched)
eval_task_names
=
(
"mnli"
,
"mnli-mm"
)
if
args
.
task_name
==
"mnli"
else
(
args
.
task_name
,)
eval_outputs_dirs
=
(
args
.
output_dir
,
args
.
output_dir
+
'-MM'
)
if
args
.
task_name
==
"mnli"
else
(
args
.
output_dir
,)
results
=
{}
for
eval_task
,
eval_output_dir
in
zip
(
eval_task_names
,
eval_outputs_dirs
):
eval_dataset
,
label_list
=
load_and_cache_examples
(
args
,
eval_task
,
tokenizer
,
evaluate
=
True
)
if
not
os
.
path
.
exists
(
eval_output_dir
)
and
args
.
local_rank
in
[
-
1
,
0
]:
os
.
makedirs
(
eval_output_dir
)
args
.
eval_batch_size
=
args
.
per_gpu_eval_batch_size
*
max
(
1
,
args
.
n_gpu
)
# Note that DistributedSampler samples randomly
eval_sampler
=
SequentialSampler
(
eval_dataset
)
eval_dataloader
=
DataLoader
(
eval_dataset
,
sampler
=
eval_sampler
,
batch_size
=
args
.
eval_batch_size
)
# multi-gpu eval
if
args
.
n_gpu
>
1
:
model
=
torch
.
nn
.
DataParallel
(
model
)
# Eval!
logger
.
info
(
"***** Running evaluation {} *****"
.
format
(
prefix
))
logger
.
info
(
" Num examples = %d"
,
len
(
eval_dataset
))
logger
.
info
(
" Batch size = %d"
,
args
.
eval_batch_size
)
eval_loss
=
0.0
nb_eval_steps
=
0
preds
=
None
out_label_ids
=
None
for
batch
in
tqdm
(
eval_dataloader
,
desc
=
"Evaluating"
):
model
.
eval
()
batch
=
tuple
(
t
.
to
(
args
.
device
)
for
t
in
batch
)
with
torch
.
no_grad
():
inputs
=
{
'input_ids'
:
batch
[
0
],
'attention_mask'
:
batch
[
1
],
'labels'
:
batch
[
3
]}
if
args
.
model_type
!=
'distilbert'
:
inputs
[
'token_type_ids'
]
=
batch
[
2
]
if
args
.
model_type
in
[
'bert'
,
'xlnet'
]
else
None
# XLM, DistilBERT and RoBERTa don't use segment_ids
outputs
=
model
(
**
inputs
)
tmp_eval_loss
,
logits
=
outputs
[:
2
]
eval_loss
+=
tmp_eval_loss
.
mean
().
item
()
nb_eval_steps
+=
1
if
preds
is
None
:
preds
=
logits
.
detach
().
cpu
().
numpy
()
out_label_ids
=
inputs
[
'labels'
].
detach
().
cpu
().
numpy
()
pair_ids
=
batch
[
4
].
detach
().
cpu
().
numpy
()
else
:
preds
=
np
.
append
(
preds
,
logits
.
detach
().
cpu
().
numpy
(),
axis
=
0
)
out_label_ids
=
np
.
append
(
out_label_ids
,
inputs
[
'labels'
].
detach
().
cpu
().
numpy
(),
axis
=
0
)
pair_ids
=
np
.
append
(
pair_ids
,
batch
[
4
].
detach
().
cpu
().
numpy
(),
axis
=
0
)
eval_loss
=
eval_loss
/
nb_eval_steps
if
args
.
output_mode
==
"classification"
:
preds
=
np
.
argmax
(
preds
,
axis
=
1
)
elif
args
.
output_mode
==
"regression"
:
preds
=
np
.
squeeze
(
preds
)
output_eval_file
=
os
.
path
.
join
(
eval_output_dir
,
"hans_predictions.txt"
)
with
open
(
output_eval_file
,
"w"
)
as
writer
:
writer
.
write
(
"pairID,gld_label
\n
"
)
for
pid
,
pred
in
zip
(
pair_ids
,
preds
):
writer
.
write
(
'ex'
+
str
(
pid
)
+
','
+
label_list
[
int
(
pred
)]
+
'
\n
'
)
return
results
def
load_and_cache_examples
(
args
,
task
,
tokenizer
,
evaluate
=
False
):
if
args
.
local_rank
not
in
[
-
1
,
0
]
and
not
evaluate
:
torch
.
distributed
.
barrier
()
# Make sure only the first process in distributed training process the dataset, and the others will use the cache
processor
=
processors
[
task
]()
output_mode
=
output_modes
[
task
]
# Load data features from cache or dataset file
cached_features_file
=
os
.
path
.
join
(
args
.
data_dir
,
'cached_{}_{}_{}_{}'
.
format
(
'dev'
if
evaluate
else
'train'
,
list
(
filter
(
None
,
args
.
model_name_or_path
.
split
(
'/'
))).
pop
(),
str
(
args
.
max_seq_length
),
str
(
task
)))
label_list
=
processor
.
get_labels
()
if
os
.
path
.
exists
(
cached_features_file
)
and
not
args
.
overwrite_cache
:
logger
.
info
(
"Loading features from cached file %s"
,
cached_features_file
)
features
=
torch
.
load
(
cached_features_file
)
else
:
logger
.
info
(
"Creating features from dataset file at %s"
,
args
.
data_dir
)
if
task
in
[
'mnli'
,
'mnli-mm'
]
and
args
.
model_type
in
[
'roberta'
]:
# HACK(label indices are swapped in RoBERTa pretrained model)
label_list
[
1
],
label_list
[
2
]
=
label_list
[
2
],
label_list
[
1
]
examples
=
processor
.
get_dev_examples
(
args
.
data_dir
)
if
evaluate
else
processor
.
get_train_examples
(
args
.
data_dir
)
features
=
convert_examples_to_features
(
examples
,
tokenizer
,
label_list
=
label_list
,
max_length
=
args
.
max_seq_length
,
output_mode
=
output_mode
,
pad_on_left
=
bool
(
args
.
model_type
in
[
'xlnet'
]),
# pad on the left for xlnet
pad_token
=
tokenizer
.
convert_tokens_to_ids
([
tokenizer
.
pad_token
])[
0
],
pad_token_segment_id
=
4
if
args
.
model_type
in
[
'xlnet'
]
else
0
,
)
if
args
.
local_rank
in
[
-
1
,
0
]:
logger
.
info
(
"Saving features into cached file %s"
,
cached_features_file
)
torch
.
save
(
features
,
cached_features_file
)
if
args
.
local_rank
==
0
and
not
evaluate
:
torch
.
distributed
.
barrier
()
# Make sure only the first process in distributed training process the dataset, and the others will use the cache
# Convert to Tensors and build dataset
all_input_ids
=
torch
.
tensor
([
f
.
input_ids
for
f
in
features
],
dtype
=
torch
.
long
)
all_attention_mask
=
torch
.
tensor
([
f
.
attention_mask
for
f
in
features
],
dtype
=
torch
.
long
)
all_token_type_ids
=
torch
.
tensor
([
f
.
token_type_ids
for
f
in
features
],
dtype
=
torch
.
long
)
if
output_mode
==
"classification"
:
all_labels
=
torch
.
tensor
([
f
.
label
for
f
in
features
],
dtype
=
torch
.
long
)
elif
output_mode
==
"regression"
:
all_labels
=
torch
.
tensor
([
f
.
label
for
f
in
features
],
dtype
=
torch
.
float
)
all_pair_ids
=
torch
.
tensor
([
int
(
f
.
pairID
)
for
f
in
features
],
dtype
=
torch
.
long
)
dataset
=
TensorDataset
(
all_input_ids
,
all_attention_mask
,
all_token_type_ids
,
all_labels
,
all_pair_ids
)
return
dataset
,
label_list
def
main
():
parser
=
argparse
.
ArgumentParser
()
## Required parameters
parser
.
add_argument
(
"--data_dir"
,
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"The input data dir. Should contain the .tsv files (or other data files) for the task."
)
parser
.
add_argument
(
"--model_type"
,
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Model type selected in the list: "
+
", "
.
join
(
MODEL_CLASSES
.
keys
()))
parser
.
add_argument
(
"--model_name_or_path"
,
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Path to pre-trained model or shortcut name selected in the list: "
+
", "
.
join
(
ALL_MODELS
))
parser
.
add_argument
(
"--task_name"
,
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"The name of the task to train selected in the list: "
+
", "
.
join
(
processors
.
keys
()))
parser
.
add_argument
(
"--output_dir"
,
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"The output directory where the model predictions and checkpoints will be written."
)
## Other parameters
parser
.
add_argument
(
"--config_name"
,
default
=
""
,
type
=
str
,
help
=
"Pretrained config name or path if not the same as model_name"
)
parser
.
add_argument
(
"--tokenizer_name"
,
default
=
""
,
type
=
str
,
help
=
"Pretrained tokenizer name or path if not the same as model_name"
)
parser
.
add_argument
(
"--cache_dir"
,
default
=
""
,
type
=
str
,
help
=
"Where do you want to store the pre-trained models downloaded from s3"
)
parser
.
add_argument
(
"--max_seq_length"
,
default
=
128
,
type
=
int
,
help
=
"The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded."
)
parser
.
add_argument
(
"--do_train"
,
action
=
'store_true'
,
help
=
"Whether to run training."
)
parser
.
add_argument
(
"--do_eval"
,
action
=
'store_true'
,
help
=
"Whether to run eval on the dev set."
)
parser
.
add_argument
(
"--evaluate_during_training"
,
action
=
'store_true'
,
help
=
"Rul evaluation during training at each logging step."
)
parser
.
add_argument
(
"--do_lower_case"
,
action
=
'store_true'
,
help
=
"Set this flag if you are using an uncased model."
)
parser
.
add_argument
(
"--per_gpu_train_batch_size"
,
default
=
8
,
type
=
int
,
help
=
"Batch size per GPU/CPU for training."
)
parser
.
add_argument
(
"--per_gpu_eval_batch_size"
,
default
=
8
,
type
=
int
,
help
=
"Batch size per GPU/CPU for evaluation."
)
parser
.
add_argument
(
'--gradient_accumulation_steps'
,
type
=
int
,
default
=
1
,
help
=
"Number of updates steps to accumulate before performing a backward/update pass."
)
parser
.
add_argument
(
"--learning_rate"
,
default
=
5e-5
,
type
=
float
,
help
=
"The initial learning rate for Adam."
)
parser
.
add_argument
(
"--weight_decay"
,
default
=
0.0
,
type
=
float
,
help
=
"Weight decay if we apply some."
)
parser
.
add_argument
(
"--adam_epsilon"
,
default
=
1e-8
,
type
=
float
,
help
=
"Epsilon for Adam optimizer."
)
parser
.
add_argument
(
"--max_grad_norm"
,
default
=
1.0
,
type
=
float
,
help
=
"Max gradient norm."
)
parser
.
add_argument
(
"--num_train_epochs"
,
default
=
3.0
,
type
=
float
,
help
=
"Total number of training epochs to perform."
)
parser
.
add_argument
(
"--max_steps"
,
default
=-
1
,
type
=
int
,
help
=
"If > 0: set total number of training steps to perform. Override num_train_epochs."
)
parser
.
add_argument
(
"--warmup_steps"
,
default
=
0
,
type
=
int
,
help
=
"Linear warmup over warmup_steps."
)
parser
.
add_argument
(
'--logging_steps'
,
type
=
int
,
default
=
50
,
help
=
"Log every X updates steps."
)
parser
.
add_argument
(
'--save_steps'
,
type
=
int
,
default
=
50
,
help
=
"Save checkpoint every X updates steps."
)
parser
.
add_argument
(
"--eval_all_checkpoints"
,
action
=
'store_true'
,
help
=
"Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number"
)
parser
.
add_argument
(
"--no_cuda"
,
action
=
'store_true'
,
help
=
"Avoid using CUDA when available"
)
parser
.
add_argument
(
'--overwrite_output_dir'
,
action
=
'store_true'
,
help
=
"Overwrite the content of the output directory"
)
parser
.
add_argument
(
'--overwrite_cache'
,
action
=
'store_true'
,
help
=
"Overwrite the cached training and evaluation sets"
)
parser
.
add_argument
(
'--seed'
,
type
=
int
,
default
=
42
,
help
=
"random seed for initialization"
)
parser
.
add_argument
(
'--fp16'
,
action
=
'store_true'
,
help
=
"Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit"
)
parser
.
add_argument
(
'--fp16_opt_level'
,
type
=
str
,
default
=
'O1'
,
help
=
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html"
)
parser
.
add_argument
(
"--local_rank"
,
type
=
int
,
default
=-
1
,
help
=
"For distributed training: local_rank"
)
parser
.
add_argument
(
'--server_ip'
,
type
=
str
,
default
=
''
,
help
=
"For distant debugging."
)
parser
.
add_argument
(
'--server_port'
,
type
=
str
,
default
=
''
,
help
=
"For distant debugging."
)
args
=
parser
.
parse_args
()
if
os
.
path
.
exists
(
args
.
output_dir
)
and
os
.
listdir
(
args
.
output_dir
)
and
args
.
do_train
and
not
args
.
overwrite_output_dir
:
raise
ValueError
(
"Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome."
.
format
(
args
.
output_dir
))
# Setup distant debugging if needed
if
args
.
server_ip
and
args
.
server_port
:
# Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
import
ptvsd
print
(
"Waiting for debugger attach"
)
ptvsd
.
enable_attach
(
address
=
(
args
.
server_ip
,
args
.
server_port
),
redirect_output
=
True
)
ptvsd
.
wait_for_attach
()
# Setup CUDA, GPU & distributed training
if
args
.
local_rank
==
-
1
or
args
.
no_cuda
:
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
and
not
args
.
no_cuda
else
"cpu"
)
args
.
n_gpu
=
torch
.
cuda
.
device_count
()
else
:
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
torch
.
cuda
.
set_device
(
args
.
local_rank
)
device
=
torch
.
device
(
"cuda"
,
args
.
local_rank
)
torch
.
distributed
.
init_process_group
(
backend
=
'nccl'
)
args
.
n_gpu
=
1
args
.
device
=
device
# Setup logging
logging
.
basicConfig
(
format
=
'%(asctime)s - %(levelname)s - %(name)s - %(message)s'
,
datefmt
=
'%m/%d/%Y %H:%M:%S'
,
level
=
logging
.
INFO
if
args
.
local_rank
in
[
-
1
,
0
]
else
logging
.
WARN
)
logger
.
warning
(
"Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s"
,
args
.
local_rank
,
device
,
args
.
n_gpu
,
bool
(
args
.
local_rank
!=
-
1
),
args
.
fp16
)
# Set seed
set_seed
(
args
)
# Prepare GLUE task
args
.
task_name
=
args
.
task_name
.
lower
()
if
args
.
task_name
not
in
processors
:
raise
ValueError
(
"Task not found: %s"
%
(
args
.
task_name
))
processor
=
processors
[
args
.
task_name
]()
args
.
output_mode
=
output_modes
[
args
.
task_name
]
label_list
=
processor
.
get_labels
()
num_labels
=
len
(
label_list
)
# Load pretrained model and tokenizer
if
args
.
local_rank
not
in
[
-
1
,
0
]:
torch
.
distributed
.
barrier
()
# Make sure only the first process in distributed training will download model & vocab
args
.
model_type
=
args
.
model_type
.
lower
()
config_class
,
model_class
,
tokenizer_class
=
MODEL_CLASSES
[
args
.
model_type
]
config
=
config_class
.
from_pretrained
(
args
.
config_name
if
args
.
config_name
else
args
.
model_name_or_path
,
num_labels
=
num_labels
,
finetuning_task
=
args
.
task_name
,
cache_dir
=
args
.
cache_dir
if
args
.
cache_dir
else
None
)
tokenizer
=
tokenizer_class
.
from_pretrained
(
args
.
tokenizer_name
if
args
.
tokenizer_name
else
args
.
model_name_or_path
,
do_lower_case
=
args
.
do_lower_case
,
cache_dir
=
args
.
cache_dir
if
args
.
cache_dir
else
None
)
model
=
model_class
.
from_pretrained
(
args
.
model_name_or_path
,
from_tf
=
bool
(
'.ckpt'
in
args
.
model_name_or_path
),
config
=
config
,
cache_dir
=
args
.
cache_dir
if
args
.
cache_dir
else
None
)
if
args
.
local_rank
==
0
:
torch
.
distributed
.
barrier
()
# Make sure only the first process in distributed training will download model & vocab
model
.
to
(
args
.
device
)
logger
.
info
(
"Training/evaluation parameters %s"
,
args
)
# Training
if
args
.
do_train
:
train_dataset
,
_
=
load_and_cache_examples
(
args
,
args
.
task_name
,
tokenizer
,
evaluate
=
False
)
global_step
,
tr_loss
=
train
(
args
,
train_dataset
,
model
,
tokenizer
)
logger
.
info
(
" global_step = %s, average loss = %s"
,
global_step
,
tr_loss
)
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
if
args
.
do_train
and
(
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
):
# Create output directory if needed
if
not
os
.
path
.
exists
(
args
.
output_dir
)
and
args
.
local_rank
in
[
-
1
,
0
]:
os
.
makedirs
(
args
.
output_dir
)
logger
.
info
(
"Saving model checkpoint to %s"
,
args
.
output_dir
)
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
model_to_save
=
model
.
module
if
hasattr
(
model
,
'module'
)
else
model
# Take care of distributed/parallel training
model_to_save
.
save_pretrained
(
args
.
output_dir
)
tokenizer
.
save_pretrained
(
args
.
output_dir
)
# Good practice: save your training arguments together with the trained model
torch
.
save
(
args
,
os
.
path
.
join
(
args
.
output_dir
,
'training_args.bin'
))
# Load a trained model and vocabulary that you have fine-tuned
model
=
model_class
.
from_pretrained
(
args
.
output_dir
)
tokenizer
=
tokenizer_class
.
from_pretrained
(
args
.
output_dir
)
model
.
to
(
args
.
device
)
# Evaluation
results
=
{}
if
args
.
do_eval
and
args
.
local_rank
in
[
-
1
,
0
]:
tokenizer
=
tokenizer_class
.
from_pretrained
(
args
.
output_dir
,
do_lower_case
=
args
.
do_lower_case
)
checkpoints
=
[
args
.
output_dir
]
if
args
.
eval_all_checkpoints
:
checkpoints
=
list
(
os
.
path
.
dirname
(
c
)
for
c
in
sorted
(
glob
.
glob
(
args
.
output_dir
+
'/**/'
+
WEIGHTS_NAME
,
recursive
=
True
)))
logging
.
getLogger
(
"transformers.modeling_utils"
).
setLevel
(
logging
.
WARN
)
# Reduce logging
logger
.
info
(
"Evaluate the following checkpoints: %s"
,
checkpoints
)
for
checkpoint
in
checkpoints
:
global_step
=
checkpoint
.
split
(
'-'
)[
-
1
]
if
len
(
checkpoints
)
>
1
else
""
prefix
=
checkpoint
.
split
(
'/'
)[
-
1
]
if
checkpoint
.
find
(
'checkpoint'
)
!=
-
1
else
""
model
=
model_class
.
from_pretrained
(
checkpoint
)
model
.
to
(
args
.
device
)
result
=
evaluate
(
args
,
model
,
tokenizer
,
prefix
=
prefix
)
result
=
dict
((
k
+
'_{}'
.
format
(
global_step
),
v
)
for
k
,
v
in
result
.
items
())
results
.
update
(
result
)
return
results
if
__name__
==
"__main__"
:
main
()
examples/utils_hans.py
0 → 100644
View file @
99d45155
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
csv
import
sys
import
copy
import
json
class
InputExample
(
object
):
"""
A single training/test example for simple sequence classification.
Args:
guid: Unique id for the example.
text_a: string. The untokenized text of the first sequence. For single
sequence tasks, only this sequence must be specified.
text_b: (Optional) string. The untokenized text of the second sequence.
Only must be specified for sequence pair tasks.
label: (Optional) string. The label of the example. This should be
specified for train and dev examples, but not for test examples.
"""
def
__init__
(
self
,
guid
,
text_a
,
text_b
=
None
,
label
=
None
,
pairID
=
None
):
self
.
guid
=
guid
self
.
text_a
=
text_a
self
.
text_b
=
text_b
self
.
label
=
label
self
.
pairID
=
pairID
def
__repr__
(
self
):
return
str
(
self
.
to_json_string
())
def
to_dict
(
self
):
"""Serializes this instance to a Python dictionary."""
output
=
copy
.
deepcopy
(
self
.
__dict__
)
return
output
def
to_json_string
(
self
):
"""Serializes this instance to a JSON string."""
return
json
.
dumps
(
self
.
to_dict
(),
indent
=
2
,
sort_keys
=
True
)
+
"
\n
"
class
InputFeatures
(
object
):
"""
A single set of features of data.
Args:
input_ids: Indices of input sequence tokens in the vocabulary.
attention_mask: Mask to avoid performing attention on padding token indices.
Mask values selected in ``[0, 1]``:
Usually ``1`` for tokens that are NOT MASKED, ``0`` for MASKED (padded) tokens.
token_type_ids: Segment token indices to indicate first and second portions of the inputs.
label: Label corresponding to the input
"""
def
__init__
(
self
,
input_ids
,
attention_mask
,
token_type_ids
,
label
,
pairID
=
None
):
self
.
input_ids
=
input_ids
self
.
attention_mask
=
attention_mask
self
.
token_type_ids
=
token_type_ids
self
.
label
=
label
self
.
pairID
=
pairID
def
__repr__
(
self
):
return
str
(
self
.
to_json_string
())
def
to_dict
(
self
):
"""Serializes this instance to a Python dictionary."""
output
=
copy
.
deepcopy
(
self
.
__dict__
)
return
output
def
to_json_string
(
self
):
"""Serializes this instance to a JSON string."""
return
json
.
dumps
(
self
.
to_dict
(),
indent
=
2
,
sort_keys
=
True
)
+
"
\n
"
class
DataProcessor
(
object
):
"""Base class for data converters for sequence classification data sets."""
def
get_example_from_tensor_dict
(
self
,
tensor_dict
):
"""Gets an example from a dict with tensorflow tensors
Args:
tensor_dict: Keys and values should match the corresponding Glue
tensorflow_dataset examples.
"""
raise
NotImplementedError
()
def
get_train_examples
(
self
,
data_dir
):
"""Gets a collection of `InputExample`s for the train set."""
raise
NotImplementedError
()
def
get_dev_examples
(
self
,
data_dir
):
"""Gets a collection of `InputExample`s for the dev set."""
raise
NotImplementedError
()
def
get_labels
(
self
):
"""Gets the list of labels for this data set."""
raise
NotImplementedError
()
@
classmethod
def
_read_tsv
(
cls
,
input_file
,
quotechar
=
None
):
"""Reads a tab separated value file."""
with
open
(
input_file
,
"r"
,
encoding
=
"utf-8-sig"
)
as
f
:
reader
=
csv
.
reader
(
f
,
delimiter
=
"
\t
"
,
quotechar
=
quotechar
)
lines
=
[]
for
line
in
reader
:
if
sys
.
version_info
[
0
]
==
2
:
line
=
list
(
unicode
(
cell
,
'utf-8'
)
for
cell
in
line
)
lines
.
append
(
line
)
return
lines
src/transformers/data/metrics/__init__.py
View file @
99d45155
...
...
@@ -72,6 +72,8 @@ if _has_sklearn:
return
{
"acc"
:
simple_accuracy
(
preds
,
labels
)}
elif
task_name
==
"wnli"
:
return
{
"acc"
:
simple_accuracy
(
preds
,
labels
)}
elif
task_name
==
"hans"
:
return
{
"acc"
:
simple_accuracy
(
preds
,
labels
)}
else
:
raise
KeyError
(
task_name
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment