Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
a75c64d8
Commit
a75c64d8
authored
Aug 26, 2020
by
Lysandre
Browse files
Black 20 release
parent
e78c1103
Changes
191
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
197 additions
and
93 deletions
+197
-93
src/transformers/tokenization_utils_fast.py
src/transformers/tokenization_utils_fast.py
+4
-4
src/transformers/tokenization_xlm.py
src/transformers/tokenization_xlm.py
+6
-1
src/transformers/training_args.py
src/transformers/training_args.py
+4
-2
src/transformers/training_args_tf.py
src/transformers/training_args_tf.py
+2
-1
templates/adding_a_new_example_script/utils_xxx.py
templates/adding_a_new_example_script/utils_xxx.py
+3
-3
templates/adding_a_new_model/configuration_xxx.py
templates/adding_a_new_model/configuration_xxx.py
+37
-37
templates/adding_a_new_model/modeling_tf_xxx.py
templates/adding_a_new_model/modeling_tf_xxx.py
+23
-8
templates/adding_a_new_model/modeling_xxx.py
templates/adding_a_new_model/modeling_xxx.py
+18
-10
tests/test_modeling_albert.py
tests/test_modeling_albert.py
+2
-1
tests/test_modeling_bart.py
tests/test_modeling_bart.py
+22
-6
tests/test_modeling_bert.py
tests/test_modeling_bert.py
+4
-1
tests/test_modeling_camembert.py
tests/test_modeling_camembert.py
+3
-1
tests/test_modeling_common.py
tests/test_modeling_common.py
+37
-9
tests/test_modeling_ctrl.py
tests/test_modeling_ctrl.py
+2
-1
tests/test_modeling_distilbert.py
tests/test_modeling_distilbert.py
+3
-1
tests/test_modeling_dpr.py
tests/test_modeling_dpr.py
+13
-2
tests/test_modeling_electra.py
tests/test_modeling_electra.py
+2
-1
tests/test_modeling_encoder_decoder.py
tests/test_modeling_encoder_decoder.py
+8
-2
tests/test_modeling_flaubert.py
tests/test_modeling_flaubert.py
+2
-1
tests/test_modeling_gpt2.py
tests/test_modeling_gpt2.py
+2
-1
No files found.
src/transformers/tokenization_utils_fast.py
View file @
a75c64d8
...
@@ -139,12 +139,12 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
...
@@ -139,12 +139,12 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
return_length
:
bool
=
False
,
return_length
:
bool
=
False
,
verbose
:
bool
=
True
,
verbose
:
bool
=
True
,
)
->
Dict
[
str
,
Any
]:
)
->
Dict
[
str
,
Any
]:
"""
Convert the encoding representation (from low-level HuggingFace tokenizer output) to a python Dict.
"""Convert the encoding representation (from low-level HuggingFace tokenizer output) to a python Dict.
Overflowing tokens are converted to additional examples (like batches) so the output values of
Overflowing tokens are converted to additional examples (like batches) so the output values of
the dict are lists (overflows) of lists (tokens).
the dict are lists (overflows) of lists (tokens).
Output shape: (overflows, sequence length)
Output shape: (overflows, sequence length)
"""
"""
if
return_token_type_ids
is
None
:
if
return_token_type_ids
is
None
:
return_token_type_ids
=
"token_type_ids"
in
self
.
model_input_names
return_token_type_ids
=
"token_type_ids"
in
self
.
model_input_names
...
...
src/transformers/tokenization_xlm.py
View file @
a75c64d8
...
@@ -902,7 +902,12 @@ class XLMTokenizer(PreTrainedTokenizer):
...
@@ -902,7 +902,12 @@ class XLMTokenizer(PreTrainedTokenizer):
"You should not supply a second sequence if the provided sequence of "
"You should not supply a second sequence if the provided sequence of "
"ids is already formated with special tokens for the model."
"ids is already formated with special tokens for the model."
)
)
return
list
(
map
(
lambda
x
:
1
if
x
in
[
self
.
sep_token_id
,
self
.
cls_token_id
]
else
0
,
token_ids_0
,))
return
list
(
map
(
lambda
x
:
1
if
x
in
[
self
.
sep_token_id
,
self
.
cls_token_id
]
else
0
,
token_ids_0
,
)
)
if
token_ids_1
is
not
None
:
if
token_ids_1
is
not
None
:
return
[
1
]
+
([
0
]
*
len
(
token_ids_0
))
+
[
1
]
+
([
0
]
*
len
(
token_ids_1
))
+
[
1
]
return
[
1
]
+
([
0
]
*
len
(
token_ids_0
))
+
[
1
]
+
([
0
]
*
len
(
token_ids_1
))
+
[
1
]
...
...
src/transformers/training_args.py
View file @
a75c64d8
...
@@ -141,10 +141,12 @@ class TrainingArguments:
...
@@ -141,10 +141,12 @@ class TrainingArguments:
do_eval
:
bool
=
field
(
default
=
False
,
metadata
=
{
"help"
:
"Whether to run eval on the dev set."
})
do_eval
:
bool
=
field
(
default
=
False
,
metadata
=
{
"help"
:
"Whether to run eval on the dev set."
})
do_predict
:
bool
=
field
(
default
=
False
,
metadata
=
{
"help"
:
"Whether to run predictions on the test set."
})
do_predict
:
bool
=
field
(
default
=
False
,
metadata
=
{
"help"
:
"Whether to run predictions on the test set."
})
evaluate_during_training
:
bool
=
field
(
evaluate_during_training
:
bool
=
field
(
default
=
False
,
metadata
=
{
"help"
:
"Run evaluation during training at each logging step."
},
default
=
False
,
metadata
=
{
"help"
:
"Run evaluation during training at each logging step."
},
)
)
prediction_loss_only
:
bool
=
field
(
prediction_loss_only
:
bool
=
field
(
default
=
False
,
metadata
=
{
"help"
:
"When performing evaluation and predictions, only returns the loss."
},
default
=
False
,
metadata
=
{
"help"
:
"When performing evaluation and predictions, only returns the loss."
},
)
)
per_device_train_batch_size
:
int
=
field
(
per_device_train_batch_size
:
int
=
field
(
...
...
src/transformers/training_args_tf.py
View file @
a75c64d8
...
@@ -100,7 +100,8 @@ class TFTrainingArguments(TrainingArguments):
...
@@ -100,7 +100,8 @@ class TFTrainingArguments(TrainingArguments):
"""
"""
tpu_name
:
str
=
field
(
tpu_name
:
str
=
field
(
default
=
None
,
metadata
=
{
"help"
:
"Name of TPU"
},
default
=
None
,
metadata
=
{
"help"
:
"Name of TPU"
},
)
)
@
cached_property
@
cached_property
...
...
templates/adding_a_new_example_script/utils_xxx.py
View file @
a75c64d8
...
@@ -703,10 +703,10 @@ def write_predictions_extended(
...
@@ -703,10 +703,10 @@ def write_predictions_extended(
tokenizer
,
tokenizer
,
verbose_logging
,
verbose_logging
,
):
):
"""
XLNet write prediction logic (more complex than Bert's).
"""XLNet write prediction logic (more complex than Bert's).
Write final predictions to the json file and log-odds of null if needed.
Write final predictions to the json file and log-odds of null if needed.
Requires utils_squad_evaluate.py
Requires utils_squad_evaluate.py
"""
"""
_PrelimPrediction
=
collections
.
namedtuple
(
# pylint: disable=invalid-name
_PrelimPrediction
=
collections
.
namedtuple
(
# pylint: disable=invalid-name
"PrelimPrediction"
,
[
"feature_index"
,
"start_index"
,
"end_index"
,
"start_log_prob"
,
"end_log_prob"
]
"PrelimPrediction"
,
[
"feature_index"
,
"start_index"
,
"end_index"
,
"start_log_prob"
,
"end_log_prob"
]
...
...
templates/adding_a_new_model/configuration_xxx.py
View file @
a75c64d8
...
@@ -31,47 +31,47 @@ XXX_PRETRAINED_CONFIG_ARCHIVE_MAP = {
...
@@ -31,47 +31,47 @@ XXX_PRETRAINED_CONFIG_ARCHIVE_MAP = {
class
XxxConfig
(
PretrainedConfig
):
class
XxxConfig
(
PretrainedConfig
):
r
"""
r
"""
This is the configuration class to store the configuration of a :class:`~transformers.XXXModel`.
This is the configuration class to store the configuration of a :class:`~transformers.XXXModel`.
It is used to instantiate a XXX model according to the specified arguments, defining the model
It is used to instantiate a XXX model according to the specified arguments, defining the model
architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
the XXX `xxx-base-uncased <https://huggingface.co/xxx/xxx-base-uncased>`__ architecture.
the XXX `xxx-base-uncased <https://huggingface.co/xxx/xxx-base-uncased>`__ architecture.
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used
to control the model outputs. Read the documentation from :class:`~transformers.PretrainedConfig`
to control the model outputs. Read the documentation from :class:`~transformers.PretrainedConfig`
for more information.
for more information.
Args:
Args:
vocab_size (:obj:`int`, optional, defaults to 30522):
vocab_size (:obj:`int`, optional, defaults to 30522):
Vocabulary size of the XXX model. Defines the different tokens that
Vocabulary size of the XXX model. Defines the different tokens that
can be represented by the `inputs_ids` passed to the forward method of :class:`~transformers.XXXModel`.
can be represented by the `inputs_ids` passed to the forward method of :class:`~transformers.XXXModel`.
hidden_size (:obj:`int`, optional, defaults to 768):
hidden_size (:obj:`int`, optional, defaults to 768):
Dimensionality of the encoder layers and the pooler layer.
Dimensionality of the encoder layers and the pooler layer.
num_hidden_layers (:obj:`int`, optional, defaults to 12):
num_hidden_layers (:obj:`int`, optional, defaults to 12):
Number of hidden layers in the Transformer encoder.
Number of hidden layers in the Transformer encoder.
num_attention_heads (:obj:`int`, optional, defaults to 12):
num_attention_heads (:obj:`int`, optional, defaults to 12):
Number of attention heads for each attention layer in the Transformer encoder.
Number of attention heads for each attention layer in the Transformer encoder.
hidden_act (:obj:`str` or :obj:`function`, optional, defaults to :obj:`"gelu"`):
hidden_act (:obj:`str` or :obj:`function`, optional, defaults to :obj:`"gelu"`):
The non-linear activation function (function or string) in the encoder and pooler.
The non-linear activation function (function or string) in the encoder and pooler.
If string, :obj:`"gelu"`, :obj:`"relu"`, :obj:`"swish"` and :obj:`"gelu_new"` are supported.
If string, :obj:`"gelu"`, :obj:`"relu"`, :obj:`"swish"` and :obj:`"gelu_new"` are supported.
hidden_dropout_prob (:obj:`float`, optional, defaults to 0.1):
hidden_dropout_prob (:obj:`float`, optional, defaults to 0.1):
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob (:obj:`float`, optional, defaults to 0.1):
attention_probs_dropout_prob (:obj:`float`, optional, defaults to 0.1):
The dropout ratio for the attention probabilities.
The dropout ratio for the attention probabilities.
max_position_embeddings (:obj:`int`, optional, defaults to 512):
max_position_embeddings (:obj:`int`, optional, defaults to 512):
The maximum sequence length that this model might ever be used with.
The maximum sequence length that this model might ever be used with.
Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
type_vocab_size (:obj:`int`, optional, defaults to 2):
type_vocab_size (:obj:`int`, optional, defaults to 2):
The vocabulary size of the `token_type_ids` passed into :class:`~transformers.BertModel`.
The vocabulary size of the `token_type_ids` passed into :class:`~transformers.BertModel`.
initializer_range (:obj:`float`, optional, defaults to 0.02):
initializer_range (:obj:`float`, optional, defaults to 0.02):
The standard deviation of the :obj:`truncated_normal_initializer` for initializing all weight matrices.
The standard deviation of the :obj:`truncated_normal_initializer` for initializing all weight matrices.
layer_norm_eps (:obj:`float`, optional, defaults to 1e-5):
layer_norm_eps (:obj:`float`, optional, defaults to 1e-5):
The epsilon used by the layer normalization layers.
The epsilon used by the layer normalization layers.
gradient_checkpointing (:obj:`bool`, optional, defaults to :obj:`False`):
gradient_checkpointing (:obj:`bool`, optional, defaults to :obj:`False`):
If :obj:`True`, use gradient checkpointing to save memory at the expense of slower backward pass.
If :obj:`True`, use gradient checkpointing to save memory at the expense of slower backward pass.
kwargs:
kwargs:
Additional arguments for common configurations, passed to :class:`~transformers.PretrainedConfig`.
Additional arguments for common configurations, passed to :class:`~transformers.PretrainedConfig`.
"""
"""
model_type
=
"xxx"
model_type
=
"xxx"
...
...
templates/adding_a_new_model/modeling_tf_xxx.py
View file @
a75c64d8
...
@@ -223,7 +223,10 @@ class TFXxxMainLayer(tf.keras.layers.Layer):
...
@@ -223,7 +223,10 @@ class TFXxxMainLayer(tf.keras.layers.Layer):
pooled_output
=
self
.
pooler
(
sequence_output
)
pooled_output
=
self
.
pooler
(
sequence_output
)
if
not
return_dict
:
if
not
return_dict
:
return
(
sequence_output
,
pooled_output
,)
+
encoder_outputs
[
1
:]
return
(
sequence_output
,
pooled_output
,
)
+
encoder_outputs
[
1
:]
return
TFBaseModelOutputWithPooling
(
return
TFBaseModelOutputWithPooling
(
last_hidden_state
=
sequence_output
,
last_hidden_state
=
sequence_output
,
...
@@ -241,8 +244,8 @@ class TFXxxMainLayer(tf.keras.layers.Layer):
...
@@ -241,8 +244,8 @@ class TFXxxMainLayer(tf.keras.layers.Layer):
# pointers for your model.
# pointers for your model.
####################################################
####################################################
class
TFXxxPreTrainedModel
(
TFPreTrainedModel
):
class
TFXxxPreTrainedModel
(
TFPreTrainedModel
):
"""
An abstract class to handle weights initialization and
"""An abstract class to handle weights initialization and
a simple interface for downloading and loading pretrained models.
a simple interface for downloading and loading pretrained models.
"""
"""
config_class
=
XxxConfig
config_class
=
XxxConfig
...
@@ -422,7 +425,10 @@ class TFXxxForMaskedLM(TFXxxPreTrainedModel, TFMaskedLanguageModelingLoss):
...
@@ -422,7 +425,10 @@ class TFXxxForMaskedLM(TFXxxPreTrainedModel, TFMaskedLanguageModelingLoss):
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
TFMaskedLMOutput
(
return
TFMaskedLMOutput
(
loss
=
loss
,
logits
=
prediction_scores
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
loss
=
loss
,
logits
=
prediction_scores
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
)
)
...
@@ -503,7 +509,10 @@ class TFXxxForSequenceClassification(TFXxxPreTrainedModel, TFSequenceClassificat
...
@@ -503,7 +509,10 @@ class TFXxxForSequenceClassification(TFXxxPreTrainedModel, TFSequenceClassificat
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
TFSequenceClassifierOutput
(
return
TFSequenceClassifierOutput
(
loss
=
loss
,
logits
=
logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
loss
=
loss
,
logits
=
logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
)
)
...
@@ -524,7 +533,7 @@ class TFXxxForMultipleChoice(TFXxxPreTrainedModel, TFMultipleChoiceLoss):
...
@@ -524,7 +533,7 @@ class TFXxxForMultipleChoice(TFXxxPreTrainedModel, TFMultipleChoiceLoss):
@
property
@
property
def
dummy_inputs
(
self
):
def
dummy_inputs
(
self
):
"""
Dummy inputs to build the network.
"""Dummy inputs to build the network.
Returns:
Returns:
tf.Tensor with dummy inputs
tf.Tensor with dummy inputs
...
@@ -631,7 +640,10 @@ class TFXxxForMultipleChoice(TFXxxPreTrainedModel, TFMultipleChoiceLoss):
...
@@ -631,7 +640,10 @@ class TFXxxForMultipleChoice(TFXxxPreTrainedModel, TFMultipleChoiceLoss):
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
TFMultipleChoiceModelOutput
(
return
TFMultipleChoiceModelOutput
(
loss
=
loss
,
logits
=
reshaped_logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
loss
=
loss
,
logits
=
reshaped_logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
)
)
...
@@ -710,7 +722,10 @@ class TFXxxForTokenClassification(TFXxxPreTrainedModel, TFTokenClassificationLos
...
@@ -710,7 +722,10 @@ class TFXxxForTokenClassification(TFXxxPreTrainedModel, TFTokenClassificationLos
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
TFTokenClassifierOutput
(
return
TFTokenClassifierOutput
(
loss
=
loss
,
logits
=
logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
loss
=
loss
,
logits
=
logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
)
)
...
...
templates/adding_a_new_model/modeling_xxx.py
View file @
a75c64d8
...
@@ -59,8 +59,7 @@ XXX_PRETRAINED_MODEL_ARCHIVE_LIST = [
...
@@ -59,8 +59,7 @@ XXX_PRETRAINED_MODEL_ARCHIVE_LIST = [
# More details: https://medium.com/huggingface/from-tensorflow-to-pytorch-265f40ef2a28
# More details: https://medium.com/huggingface/from-tensorflow-to-pytorch-265f40ef2a28
####################################################
####################################################
def
load_tf_weights_in_xxx
(
model
,
config
,
tf_checkpoint_path
):
def
load_tf_weights_in_xxx
(
model
,
config
,
tf_checkpoint_path
):
""" Load tf checkpoints in a pytorch model.
"""Load tf checkpoints in a pytorch model."""
"""
try
:
try
:
import
re
import
re
...
@@ -189,8 +188,8 @@ XxxPooler = nn.Module
...
@@ -189,8 +188,8 @@ XxxPooler = nn.Module
class
XxxPreTrainedModel
(
PreTrainedModel
):
class
XxxPreTrainedModel
(
PreTrainedModel
):
"""
An abstract class to handle weights initialization and
"""An abstract class to handle weights initialization and
a simple interface for downloading and loading pretrained models.
a simple interface for downloading and loading pretrained models.
"""
"""
config_class
=
XxxConfig
config_class
=
XxxConfig
...
@@ -290,9 +289,9 @@ class XxxModel(XxxPreTrainedModel):
...
@@ -290,9 +289,9 @@ class XxxModel(XxxPreTrainedModel):
self
.
embeddings
.
word_embeddings
=
new_embeddings
self
.
embeddings
.
word_embeddings
=
new_embeddings
def
_prune_heads
(
self
,
heads_to_prune
):
def
_prune_heads
(
self
,
heads_to_prune
):
"""
Prunes heads of the model.
"""Prunes heads of the model.
heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
See base class PreTrainedModel
See base class PreTrainedModel
"""
"""
for
layer
,
heads
in
heads_to_prune
.
items
():
for
layer
,
heads
in
heads_to_prune
.
items
():
self
.
encoder
.
layer
[
layer
].
attention
.
prune_heads
(
heads
)
self
.
encoder
.
layer
[
layer
].
attention
.
prune_heads
(
heads
)
...
@@ -517,7 +516,10 @@ class XxxForSequenceClassification(XxxPreTrainedModel):
...
@@ -517,7 +516,10 @@ class XxxForSequenceClassification(XxxPreTrainedModel):
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
SequenceClassifierOutput
(
return
SequenceClassifierOutput
(
loss
=
loss
,
logits
=
logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
loss
=
loss
,
logits
=
logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
)
)
...
@@ -603,7 +605,10 @@ class XxxForMultipleChoice(XxxPreTrainedModel):
...
@@ -603,7 +605,10 @@ class XxxForMultipleChoice(XxxPreTrainedModel):
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
MultipleChoiceModelOutput
(
return
MultipleChoiceModelOutput
(
loss
=
loss
,
logits
=
reshaped_logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
loss
=
loss
,
logits
=
reshaped_logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
)
)
...
@@ -686,7 +691,10 @@ class XxxForTokenClassification(XxxPreTrainedModel):
...
@@ -686,7 +691,10 @@ class XxxForTokenClassification(XxxPreTrainedModel):
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
TokenClassifierOutput
(
return
TokenClassifierOutput
(
loss
=
loss
,
logits
=
logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
loss
=
loss
,
logits
=
logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
)
)
...
...
tests/test_modeling_albert.py
View file @
a75c64d8
...
@@ -39,7 +39,8 @@ if is_torch_available():
...
@@ -39,7 +39,8 @@ if is_torch_available():
class
AlbertModelTester
:
class
AlbertModelTester
:
def
__init__
(
def
__init__
(
self
,
parent
,
self
,
parent
,
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
batch_size
=
13
...
...
tests/test_modeling_bart.py
View file @
a75c64d8
...
@@ -54,7 +54,8 @@ PGE_ARTICLE = """ PG&E stated it scheduled the blackouts in response to forecast
...
@@ -54,7 +54,8 @@ PGE_ARTICLE = """ PG&E stated it scheduled the blackouts in response to forecast
@
require_torch
@
require_torch
class
ModelTester
:
class
ModelTester
:
def
__init__
(
def
__init__
(
self
,
parent
,
self
,
parent
,
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
batch_size
=
13
...
@@ -76,7 +77,9 @@ class ModelTester:
...
@@ -76,7 +77,9 @@ class ModelTester:
torch
.
manual_seed
(
0
)
torch
.
manual_seed
(
0
)
def
prepare_config_and_inputs_for_common
(
self
):
def
prepare_config_and_inputs_for_common
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
).
clamp
(
3
,)
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
).
clamp
(
3
,
)
input_ids
[:,
-
1
]
=
2
# Eos Token
input_ids
[:,
-
1
]
=
2
# Eos Token
config
=
BartConfig
(
config
=
BartConfig
(
...
@@ -100,7 +103,9 @@ class ModelTester:
...
@@ -100,7 +103,9 @@ class ModelTester:
def
prepare_bart_inputs_dict
(
def
prepare_bart_inputs_dict
(
config
,
input_ids
,
attention_mask
=
None
,
config
,
input_ids
,
attention_mask
=
None
,
):
):
if
attention_mask
is
None
:
if
attention_mask
is
None
:
attention_mask
=
input_ids
.
ne
(
config
.
pad_token_id
)
attention_mask
=
input_ids
.
ne
(
config
.
pad_token_id
)
...
@@ -261,7 +266,11 @@ class BartHeadTests(unittest.TestCase):
...
@@ -261,7 +266,11 @@ class BartHeadTests(unittest.TestCase):
sequence_labels
=
ids_tensor
([
batch_size
],
2
).
to
(
torch_device
)
sequence_labels
=
ids_tensor
([
batch_size
],
2
).
to
(
torch_device
)
model
=
BartForQuestionAnswering
(
config
)
model
=
BartForQuestionAnswering
(
config
)
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
outputs
=
model
(
input_ids
=
input_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
,)
outputs
=
model
(
input_ids
=
input_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
,
)
self
.
assertEqual
(
outputs
[
"start_logits"
].
shape
,
input_ids
.
shape
)
self
.
assertEqual
(
outputs
[
"start_logits"
].
shape
,
input_ids
.
shape
)
self
.
assertEqual
(
outputs
[
"end_logits"
].
shape
,
input_ids
.
shape
)
self
.
assertEqual
(
outputs
[
"end_logits"
].
shape
,
input_ids
.
shape
)
...
@@ -491,7 +500,11 @@ class BartModelIntegrationTests(unittest.TestCase):
...
@@ -491,7 +500,11 @@ class BartModelIntegrationTests(unittest.TestCase):
EXPECTED_SUMMARY
=
"California's largest power company has begun shutting off electricity to thousands of customers in the state."
EXPECTED_SUMMARY
=
"California's largest power company has begun shutting off electricity to thousands of customers in the state."
dct
=
tok
.
batch_encode_plus
(
dct
=
tok
.
batch_encode_plus
(
[
PGE_ARTICLE
],
max_length
=
1024
,
padding
=
"max_length"
,
truncation
=
True
,
return_tensors
=
"pt"
,
[
PGE_ARTICLE
],
max_length
=
1024
,
padding
=
"max_length"
,
truncation
=
True
,
return_tensors
=
"pt"
,
).
to
(
torch_device
)
).
to
(
torch_device
)
hypotheses_batch
=
model
.
generate
(
hypotheses_batch
=
model
.
generate
(
...
@@ -506,7 +519,10 @@ class BartModelIntegrationTests(unittest.TestCase):
...
@@ -506,7 +519,10 @@ class BartModelIntegrationTests(unittest.TestCase):
decoder_start_token_id
=
model
.
config
.
eos_token_id
,
decoder_start_token_id
=
model
.
config
.
eos_token_id
,
)
)
decoded
=
tok
.
batch_decode
(
hypotheses_batch
,
skip_special_tokens
=
True
,)
decoded
=
tok
.
batch_decode
(
hypotheses_batch
,
skip_special_tokens
=
True
,
)
self
.
assertEqual
(
EXPECTED_SUMMARY
,
decoded
[
0
])
self
.
assertEqual
(
EXPECTED_SUMMARY
,
decoded
[
0
])
def
test_xsum_config_generation_params
(
self
):
def
test_xsum_config_generation_params
(
self
):
...
...
tests/test_modeling_bert.py
View file @
a75c64d8
...
@@ -264,7 +264,10 @@ class BertModelTester:
...
@@ -264,7 +264,10 @@ class BertModelTester:
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
result
=
model
(
result
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
next_sentence_label
=
sequence_labels
,
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
next_sentence_label
=
sequence_labels
,
)
)
self
.
parent
.
assertEqual
(
result
.
logits
.
shape
,
(
self
.
batch_size
,
2
))
self
.
parent
.
assertEqual
(
result
.
logits
.
shape
,
(
self
.
batch_size
,
2
))
...
...
tests/test_modeling_camembert.py
View file @
a75c64d8
...
@@ -33,7 +33,9 @@ class CamembertModelIntegrationTest(unittest.TestCase):
...
@@ -33,7 +33,9 @@ class CamembertModelIntegrationTest(unittest.TestCase):
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
input_ids
=
torch
.
tensor
(
input_ids
=
torch
.
tensor
(
[[
5
,
121
,
11
,
660
,
16
,
730
,
25543
,
110
,
83
,
6
]],
device
=
torch_device
,
dtype
=
torch
.
long
,
[[
5
,
121
,
11
,
660
,
16
,
730
,
25543
,
110
,
83
,
6
]],
device
=
torch_device
,
dtype
=
torch
.
long
,
)
# J'aime le camembert !
)
# J'aime le camembert !
output
=
model
(
input_ids
)[
"last_hidden_state"
]
output
=
model
(
input_ids
)[
"last_hidden_state"
]
expected_shape
=
torch
.
Size
((
1
,
10
,
768
))
expected_shape
=
torch
.
Size
((
1
,
10
,
768
))
...
...
tests/test_modeling_common.py
View file @
a75c64d8
...
@@ -330,7 +330,9 @@ class ModelTesterMixin:
...
@@ -330,7 +330,9 @@ class ModelTesterMixin:
# Prepare head_mask
# Prepare head_mask
# Set require_grad after having prepared the tensor to avoid error (leaf variable has been moved into the graph interior)
# Set require_grad after having prepared the tensor to avoid error (leaf variable has been moved into the graph interior)
head_mask
=
torch
.
ones
(
head_mask
=
torch
.
ones
(
self
.
model_tester
.
num_hidden_layers
,
self
.
model_tester
.
num_attention_heads
,
device
=
torch_device
,
self
.
model_tester
.
num_hidden_layers
,
self
.
model_tester
.
num_attention_heads
,
device
=
torch_device
,
)
)
head_mask
[
0
,
0
]
=
0
head_mask
[
0
,
0
]
=
0
head_mask
[
-
1
,
:
-
1
]
=
0
head_mask
[
-
1
,
:
-
1
]
=
0
...
@@ -370,7 +372,10 @@ class ModelTesterMixin:
...
@@ -370,7 +372,10 @@ class ModelTesterMixin:
return
return
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
(
config
,
inputs_dict
,)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
(
config
,
inputs_dict
,
)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
"head_mask"
in
inputs_dict
:
if
"head_mask"
in
inputs_dict
:
del
inputs_dict
[
"head_mask"
]
del
inputs_dict
[
"head_mask"
]
...
@@ -399,7 +404,10 @@ class ModelTesterMixin:
...
@@ -399,7 +404,10 @@ class ModelTesterMixin:
return
return
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
(
config
,
inputs_dict
,)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
(
config
,
inputs_dict
,
)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
"head_mask"
in
inputs_dict
:
if
"head_mask"
in
inputs_dict
:
del
inputs_dict
[
"head_mask"
]
del
inputs_dict
[
"head_mask"
]
...
@@ -432,7 +440,10 @@ class ModelTesterMixin:
...
@@ -432,7 +440,10 @@ class ModelTesterMixin:
return
return
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
(
config
,
inputs_dict
,)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
(
config
,
inputs_dict
,
)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
"head_mask"
in
inputs_dict
:
if
"head_mask"
in
inputs_dict
:
del
inputs_dict
[
"head_mask"
]
del
inputs_dict
[
"head_mask"
]
...
@@ -463,7 +474,10 @@ class ModelTesterMixin:
...
@@ -463,7 +474,10 @@ class ModelTesterMixin:
return
return
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
(
config
,
inputs_dict
,)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
(
config
,
inputs_dict
,
)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
"head_mask"
in
inputs_dict
:
if
"head_mask"
in
inputs_dict
:
del
inputs_dict
[
"head_mask"
]
del
inputs_dict
[
"head_mask"
]
...
@@ -534,7 +548,8 @@ class ModelTesterMixin:
...
@@ -534,7 +548,8 @@ class ModelTesterMixin:
seq_length
=
self
.
model_tester
.
seq_length
seq_length
=
self
.
model_tester
.
seq_length
self
.
assertListEqual
(
self
.
assertListEqual
(
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
[
seq_length
,
self
.
model_tester
.
hidden_size
],
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
[
seq_length
,
self
.
model_tester
.
hidden_size
],
)
)
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
@@ -550,7 +565,10 @@ class ModelTesterMixin:
...
@@ -550,7 +565,10 @@ class ModelTesterMixin:
check_hidden_states_output
(
inputs_dict
,
config
,
model_class
)
check_hidden_states_output
(
inputs_dict
,
config
,
model_class
)
def
test_feed_forward_chunking
(
self
):
def
test_feed_forward_chunking
(
self
):
(
original_config
,
inputs_dict
,)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
(
original_config
,
inputs_dict
,
)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
torch
.
manual_seed
(
0
)
torch
.
manual_seed
(
0
)
config
=
copy
.
deepcopy
(
original_config
)
config
=
copy
.
deepcopy
(
original_config
)
...
@@ -570,7 +588,10 @@ class ModelTesterMixin:
...
@@ -570,7 +588,10 @@ class ModelTesterMixin:
self
.
assertTrue
(
torch
.
allclose
(
hidden_states_no_chunk
,
hidden_states_with_chunk
,
atol
=
1e-3
))
self
.
assertTrue
(
torch
.
allclose
(
hidden_states_no_chunk
,
hidden_states_with_chunk
,
atol
=
1e-3
))
def
test_resize_tokens_embeddings
(
self
):
def
test_resize_tokens_embeddings
(
self
):
(
original_config
,
inputs_dict
,)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
(
original_config
,
inputs_dict
,
)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
not
self
.
test_resize_embeddings
:
if
not
self
.
test_resize_embeddings
:
return
return
...
@@ -844,7 +865,14 @@ class ModelTesterMixin:
...
@@ -844,7 +865,14 @@ class ModelTesterMixin:
model
.
generate
(
input_ids
,
do_sample
=
False
,
num_return_sequences
=
3
,
num_beams
=
2
)
model
.
generate
(
input_ids
,
do_sample
=
False
,
num_return_sequences
=
3
,
num_beams
=
2
)
# num_return_sequences > 1, sample
# num_return_sequences > 1, sample
self
.
_check_generated_ids
(
model
.
generate
(
input_ids
,
do_sample
=
True
,
num_beams
=
2
,
num_return_sequences
=
2
,))
self
.
_check_generated_ids
(
model
.
generate
(
input_ids
,
do_sample
=
True
,
num_beams
=
2
,
num_return_sequences
=
2
,
)
)
# num_return_sequences > 1, greedy
# num_return_sequences > 1, greedy
self
.
_check_generated_ids
(
model
.
generate
(
input_ids
,
do_sample
=
False
,
num_beams
=
2
,
num_return_sequences
=
2
))
self
.
_check_generated_ids
(
model
.
generate
(
input_ids
,
do_sample
=
False
,
num_beams
=
2
,
num_return_sequences
=
2
))
...
...
tests/test_modeling_ctrl.py
View file @
a75c64d8
...
@@ -30,7 +30,8 @@ if is_torch_available():
...
@@ -30,7 +30,8 @@ if is_torch_available():
class
CTRLModelTester
:
class
CTRLModelTester
:
def
__init__
(
def
__init__
(
self
,
parent
,
self
,
parent
,
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
14
self
.
batch_size
=
14
...
...
tests/test_modeling_distilbert.py
View file @
a75c64d8
...
@@ -179,7 +179,9 @@ if is_torch_available():
...
@@ -179,7 +179,9 @@ if is_torch_available():
multiple_choice_inputs_ids
=
input_ids
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
multiple_choice_inputs_ids
=
input_ids
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
multiple_choice_input_mask
=
input_mask
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
multiple_choice_input_mask
=
input_mask
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
result
=
model
(
result
=
model
(
multiple_choice_inputs_ids
,
attention_mask
=
multiple_choice_input_mask
,
labels
=
choice_labels
,
multiple_choice_inputs_ids
,
attention_mask
=
multiple_choice_input_mask
,
labels
=
choice_labels
,
)
)
self
.
parent
.
assertEqual
(
result
.
logits
.
shape
,
(
self
.
batch_size
,
self
.
num_choices
))
self
.
parent
.
assertEqual
(
result
.
logits
.
shape
,
(
self
.
batch_size
,
self
.
num_choices
))
...
...
tests/test_modeling_dpr.py
View file @
a75c64d8
...
@@ -149,7 +149,10 @@ class DPRModelTester:
...
@@ -149,7 +149,10 @@ class DPRModelTester:
model
=
DPRReader
(
config
=
config
)
model
=
DPRReader
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
result
=
model
(
input_ids
,
attention_mask
=
input_mask
,)
result
=
model
(
input_ids
,
attention_mask
=
input_mask
,
)
self
.
parent
.
assertEqual
(
result
.
start_logits
.
shape
,
(
self
.
batch_size
,
self
.
seq_length
))
self
.
parent
.
assertEqual
(
result
.
start_logits
.
shape
,
(
self
.
batch_size
,
self
.
seq_length
))
self
.
parent
.
assertEqual
(
result
.
end_logits
.
shape
,
(
self
.
batch_size
,
self
.
seq_length
))
self
.
parent
.
assertEqual
(
result
.
end_logits
.
shape
,
(
self
.
batch_size
,
self
.
seq_length
))
...
@@ -173,7 +176,15 @@ class DPRModelTester:
...
@@ -173,7 +176,15 @@ class DPRModelTester:
@
require_torch
@
require_torch
class
DPRModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
class
DPRModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
DPRContextEncoder
,
DPRQuestionEncoder
,
DPRReader
,)
if
is_torch_available
()
else
()
all_model_classes
=
(
(
DPRContextEncoder
,
DPRQuestionEncoder
,
DPRReader
,
)
if
is_torch_available
()
else
()
)
test_resize_embeddings
=
False
test_resize_embeddings
=
False
test_missing_keys
=
False
# why?
test_missing_keys
=
False
# why?
...
...
tests/test_modeling_electra.py
View file @
a75c64d8
...
@@ -39,7 +39,8 @@ if is_torch_available():
...
@@ -39,7 +39,8 @@ if is_torch_available():
class
ElectraModelTester
:
class
ElectraModelTester
:
def
__init__
(
def
__init__
(
self
,
parent
,
self
,
parent
,
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
batch_size
=
13
...
...
tests/test_modeling_encoder_decoder.py
View file @
a75c64d8
...
@@ -391,7 +391,11 @@ class EncoderDecoderMixin:
...
@@ -391,7 +391,11 @@ class EncoderDecoderMixin:
decoder_input_ids
=
ids_tensor
([
13
,
1
],
model_2
.
config
.
encoder
.
vocab_size
)
decoder_input_ids
=
ids_tensor
([
13
,
1
],
model_2
.
config
.
encoder
.
vocab_size
)
attention_mask
=
ids_tensor
([
13
,
5
],
vocab_size
=
2
)
attention_mask
=
ids_tensor
([
13
,
5
],
vocab_size
=
2
)
with
torch
.
no_grad
():
with
torch
.
no_grad
():
outputs
=
model_2
(
input_ids
=
input_ids
,
decoder_input_ids
=
decoder_input_ids
,
attention_mask
=
attention_mask
,)
outputs
=
model_2
(
input_ids
=
input_ids
,
decoder_input_ids
=
decoder_input_ids
,
attention_mask
=
attention_mask
,
)
out_2
=
outputs
[
0
].
cpu
().
numpy
()
out_2
=
outputs
[
0
].
cpu
().
numpy
()
out_2
[
np
.
isnan
(
out_2
)]
=
0
out_2
[
np
.
isnan
(
out_2
)]
=
0
...
@@ -401,7 +405,9 @@ class EncoderDecoderMixin:
...
@@ -401,7 +405,9 @@ class EncoderDecoderMixin:
model_1
.
to
(
torch_device
)
model_1
.
to
(
torch_device
)
after_outputs
=
model_1
(
after_outputs
=
model_1
(
input_ids
=
input_ids
,
decoder_input_ids
=
decoder_input_ids
,
attention_mask
=
attention_mask
,
input_ids
=
input_ids
,
decoder_input_ids
=
decoder_input_ids
,
attention_mask
=
attention_mask
,
)
)
out_1
=
after_outputs
[
0
].
cpu
().
numpy
()
out_1
=
after_outputs
[
0
].
cpu
().
numpy
()
out_1
[
np
.
isnan
(
out_1
)]
=
0
out_1
[
np
.
isnan
(
out_1
)]
=
0
...
...
tests/test_modeling_flaubert.py
View file @
a75c64d8
...
@@ -39,7 +39,8 @@ if is_torch_available():
...
@@ -39,7 +39,8 @@ if is_torch_available():
class
FlaubertModelTester
(
object
):
class
FlaubertModelTester
(
object
):
def
__init__
(
def
__init__
(
self
,
parent
,
self
,
parent
,
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
batch_size
=
13
...
...
tests/test_modeling_gpt2.py
View file @
a75c64d8
...
@@ -244,7 +244,8 @@ class GPT2ModelTester:
...
@@ -244,7 +244,8 @@ class GPT2ModelTester:
# append to next input_ids and attn_mask
# append to next input_ids and attn_mask
next_input_ids
=
torch
.
cat
([
input_ids
,
next_tokens
],
dim
=-
1
)
next_input_ids
=
torch
.
cat
([
input_ids
,
next_tokens
],
dim
=-
1
)
attn_mask
=
torch
.
cat
(
attn_mask
=
torch
.
cat
(
[
attn_mask
,
torch
.
ones
((
attn_mask
.
shape
[
0
],
1
),
dtype
=
torch
.
long
,
device
=
torch_device
)],
dim
=
1
,
[
attn_mask
,
torch
.
ones
((
attn_mask
.
shape
[
0
],
1
),
dtype
=
torch
.
long
,
device
=
torch_device
)],
dim
=
1
,
)
)
# get two different outputs
# get two different outputs
...
...
Prev
1
…
4
5
6
7
8
9
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment