Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
a75c64d8
Commit
a75c64d8
authored
Aug 26, 2020
by
Lysandre
Browse files
Black 20 release
parent
e78c1103
Changes
191
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
197 additions
and
93 deletions
+197
-93
src/transformers/tokenization_utils_fast.py
src/transformers/tokenization_utils_fast.py
+4
-4
src/transformers/tokenization_xlm.py
src/transformers/tokenization_xlm.py
+6
-1
src/transformers/training_args.py
src/transformers/training_args.py
+4
-2
src/transformers/training_args_tf.py
src/transformers/training_args_tf.py
+2
-1
templates/adding_a_new_example_script/utils_xxx.py
templates/adding_a_new_example_script/utils_xxx.py
+3
-3
templates/adding_a_new_model/configuration_xxx.py
templates/adding_a_new_model/configuration_xxx.py
+37
-37
templates/adding_a_new_model/modeling_tf_xxx.py
templates/adding_a_new_model/modeling_tf_xxx.py
+23
-8
templates/adding_a_new_model/modeling_xxx.py
templates/adding_a_new_model/modeling_xxx.py
+18
-10
tests/test_modeling_albert.py
tests/test_modeling_albert.py
+2
-1
tests/test_modeling_bart.py
tests/test_modeling_bart.py
+22
-6
tests/test_modeling_bert.py
tests/test_modeling_bert.py
+4
-1
tests/test_modeling_camembert.py
tests/test_modeling_camembert.py
+3
-1
tests/test_modeling_common.py
tests/test_modeling_common.py
+37
-9
tests/test_modeling_ctrl.py
tests/test_modeling_ctrl.py
+2
-1
tests/test_modeling_distilbert.py
tests/test_modeling_distilbert.py
+3
-1
tests/test_modeling_dpr.py
tests/test_modeling_dpr.py
+13
-2
tests/test_modeling_electra.py
tests/test_modeling_electra.py
+2
-1
tests/test_modeling_encoder_decoder.py
tests/test_modeling_encoder_decoder.py
+8
-2
tests/test_modeling_flaubert.py
tests/test_modeling_flaubert.py
+2
-1
tests/test_modeling_gpt2.py
tests/test_modeling_gpt2.py
+2
-1
No files found.
src/transformers/tokenization_utils_fast.py
View file @
a75c64d8
...
...
@@ -139,7 +139,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
return_length
:
bool
=
False
,
verbose
:
bool
=
True
,
)
->
Dict
[
str
,
Any
]:
"""
Convert the encoding representation (from low-level HuggingFace tokenizer output) to a python Dict.
"""Convert the encoding representation (from low-level HuggingFace tokenizer output) to a python Dict.
Overflowing tokens are converted to additional examples (like batches) so the output values of
the dict are lists (overflows) of lists (tokens).
...
...
src/transformers/tokenization_xlm.py
View file @
a75c64d8
...
...
@@ -902,7 +902,12 @@ class XLMTokenizer(PreTrainedTokenizer):
"You should not supply a second sequence if the provided sequence of "
"ids is already formated with special tokens for the model."
)
return
list
(
map
(
lambda
x
:
1
if
x
in
[
self
.
sep_token_id
,
self
.
cls_token_id
]
else
0
,
token_ids_0
,))
return
list
(
map
(
lambda
x
:
1
if
x
in
[
self
.
sep_token_id
,
self
.
cls_token_id
]
else
0
,
token_ids_0
,
)
)
if
token_ids_1
is
not
None
:
return
[
1
]
+
([
0
]
*
len
(
token_ids_0
))
+
[
1
]
+
([
0
]
*
len
(
token_ids_1
))
+
[
1
]
...
...
src/transformers/training_args.py
View file @
a75c64d8
...
...
@@ -141,10 +141,12 @@ class TrainingArguments:
do_eval
:
bool
=
field
(
default
=
False
,
metadata
=
{
"help"
:
"Whether to run eval on the dev set."
})
do_predict
:
bool
=
field
(
default
=
False
,
metadata
=
{
"help"
:
"Whether to run predictions on the test set."
})
evaluate_during_training
:
bool
=
field
(
default
=
False
,
metadata
=
{
"help"
:
"Run evaluation during training at each logging step."
},
default
=
False
,
metadata
=
{
"help"
:
"Run evaluation during training at each logging step."
},
)
prediction_loss_only
:
bool
=
field
(
default
=
False
,
metadata
=
{
"help"
:
"When performing evaluation and predictions, only returns the loss."
},
default
=
False
,
metadata
=
{
"help"
:
"When performing evaluation and predictions, only returns the loss."
},
)
per_device_train_batch_size
:
int
=
field
(
...
...
src/transformers/training_args_tf.py
View file @
a75c64d8
...
...
@@ -100,7 +100,8 @@ class TFTrainingArguments(TrainingArguments):
"""
tpu_name
:
str
=
field
(
default
=
None
,
metadata
=
{
"help"
:
"Name of TPU"
},
default
=
None
,
metadata
=
{
"help"
:
"Name of TPU"
},
)
@
cached_property
...
...
templates/adding_a_new_example_script/utils_xxx.py
View file @
a75c64d8
...
...
@@ -703,7 +703,7 @@ def write_predictions_extended(
tokenizer
,
verbose_logging
,
):
"""
XLNet write prediction logic (more complex than Bert's).
"""XLNet write prediction logic (more complex than Bert's).
Write final predictions to the json file and log-odds of null if needed.
Requires utils_squad_evaluate.py
...
...
templates/adding_a_new_model/configuration_xxx.py
View file @
a75c64d8
templates/adding_a_new_model/modeling_tf_xxx.py
View file @
a75c64d8
...
...
@@ -223,7 +223,10 @@ class TFXxxMainLayer(tf.keras.layers.Layer):
pooled_output
=
self
.
pooler
(
sequence_output
)
if
not
return_dict
:
return
(
sequence_output
,
pooled_output
,)
+
encoder_outputs
[
1
:]
return
(
sequence_output
,
pooled_output
,
)
+
encoder_outputs
[
1
:]
return
TFBaseModelOutputWithPooling
(
last_hidden_state
=
sequence_output
,
...
...
@@ -241,7 +244,7 @@ class TFXxxMainLayer(tf.keras.layers.Layer):
# pointers for your model.
####################################################
class
TFXxxPreTrainedModel
(
TFPreTrainedModel
):
"""
An abstract class to handle weights initialization and
"""An abstract class to handle weights initialization and
a simple interface for downloading and loading pretrained models.
"""
...
...
@@ -422,7 +425,10 @@ class TFXxxForMaskedLM(TFXxxPreTrainedModel, TFMaskedLanguageModelingLoss):
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
TFMaskedLMOutput
(
loss
=
loss
,
logits
=
prediction_scores
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
loss
=
loss
,
logits
=
prediction_scores
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
)
...
...
@@ -503,7 +509,10 @@ class TFXxxForSequenceClassification(TFXxxPreTrainedModel, TFSequenceClassificat
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
TFSequenceClassifierOutput
(
loss
=
loss
,
logits
=
logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
loss
=
loss
,
logits
=
logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
)
...
...
@@ -524,7 +533,7 @@ class TFXxxForMultipleChoice(TFXxxPreTrainedModel, TFMultipleChoiceLoss):
@
property
def
dummy_inputs
(
self
):
"""
Dummy inputs to build the network.
"""Dummy inputs to build the network.
Returns:
tf.Tensor with dummy inputs
...
...
@@ -631,7 +640,10 @@ class TFXxxForMultipleChoice(TFXxxPreTrainedModel, TFMultipleChoiceLoss):
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
TFMultipleChoiceModelOutput
(
loss
=
loss
,
logits
=
reshaped_logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
loss
=
loss
,
logits
=
reshaped_logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
)
...
...
@@ -710,7 +722,10 @@ class TFXxxForTokenClassification(TFXxxPreTrainedModel, TFTokenClassificationLos
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
TFTokenClassifierOutput
(
loss
=
loss
,
logits
=
logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
loss
=
loss
,
logits
=
logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
)
...
...
templates/adding_a_new_model/modeling_xxx.py
View file @
a75c64d8
...
...
@@ -59,8 +59,7 @@ XXX_PRETRAINED_MODEL_ARCHIVE_LIST = [
# More details: https://medium.com/huggingface/from-tensorflow-to-pytorch-265f40ef2a28
####################################################
def
load_tf_weights_in_xxx
(
model
,
config
,
tf_checkpoint_path
):
""" Load tf checkpoints in a pytorch model.
"""
"""Load tf checkpoints in a pytorch model."""
try
:
import
re
...
...
@@ -189,7 +188,7 @@ XxxPooler = nn.Module
class
XxxPreTrainedModel
(
PreTrainedModel
):
"""
An abstract class to handle weights initialization and
"""An abstract class to handle weights initialization and
a simple interface for downloading and loading pretrained models.
"""
...
...
@@ -290,7 +289,7 @@ class XxxModel(XxxPreTrainedModel):
self
.
embeddings
.
word_embeddings
=
new_embeddings
def
_prune_heads
(
self
,
heads_to_prune
):
"""
Prunes heads of the model.
"""Prunes heads of the model.
heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
See base class PreTrainedModel
"""
...
...
@@ -517,7 +516,10 @@ class XxxForSequenceClassification(XxxPreTrainedModel):
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
SequenceClassifierOutput
(
loss
=
loss
,
logits
=
logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
loss
=
loss
,
logits
=
logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
)
...
...
@@ -603,7 +605,10 @@ class XxxForMultipleChoice(XxxPreTrainedModel):
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
MultipleChoiceModelOutput
(
loss
=
loss
,
logits
=
reshaped_logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
loss
=
loss
,
logits
=
reshaped_logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
)
...
...
@@ -686,7 +691,10 @@ class XxxForTokenClassification(XxxPreTrainedModel):
return
((
loss
,)
+
output
)
if
loss
is
not
None
else
output
return
TokenClassifierOutput
(
loss
=
loss
,
logits
=
logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
loss
=
loss
,
logits
=
logits
,
hidden_states
=
outputs
.
hidden_states
,
attentions
=
outputs
.
attentions
,
)
...
...
tests/test_modeling_albert.py
View file @
a75c64d8
...
...
@@ -39,7 +39,8 @@ if is_torch_available():
class
AlbertModelTester
:
def
__init__
(
self
,
parent
,
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
...
...
tests/test_modeling_bart.py
View file @
a75c64d8
...
...
@@ -54,7 +54,8 @@ PGE_ARTICLE = """ PG&E stated it scheduled the blackouts in response to forecast
@
require_torch
class
ModelTester
:
def
__init__
(
self
,
parent
,
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
...
...
@@ -76,7 +77,9 @@ class ModelTester:
torch
.
manual_seed
(
0
)
def
prepare_config_and_inputs_for_common
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
).
clamp
(
3
,)
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
).
clamp
(
3
,
)
input_ids
[:,
-
1
]
=
2
# Eos Token
config
=
BartConfig
(
...
...
@@ -100,7 +103,9 @@ class ModelTester:
def
prepare_bart_inputs_dict
(
config
,
input_ids
,
attention_mask
=
None
,
config
,
input_ids
,
attention_mask
=
None
,
):
if
attention_mask
is
None
:
attention_mask
=
input_ids
.
ne
(
config
.
pad_token_id
)
...
...
@@ -261,7 +266,11 @@ class BartHeadTests(unittest.TestCase):
sequence_labels
=
ids_tensor
([
batch_size
],
2
).
to
(
torch_device
)
model
=
BartForQuestionAnswering
(
config
)
model
.
to
(
torch_device
)
outputs
=
model
(
input_ids
=
input_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
,)
outputs
=
model
(
input_ids
=
input_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
,
)
self
.
assertEqual
(
outputs
[
"start_logits"
].
shape
,
input_ids
.
shape
)
self
.
assertEqual
(
outputs
[
"end_logits"
].
shape
,
input_ids
.
shape
)
...
...
@@ -491,7 +500,11 @@ class BartModelIntegrationTests(unittest.TestCase):
EXPECTED_SUMMARY
=
"California's largest power company has begun shutting off electricity to thousands of customers in the state."
dct
=
tok
.
batch_encode_plus
(
[
PGE_ARTICLE
],
max_length
=
1024
,
padding
=
"max_length"
,
truncation
=
True
,
return_tensors
=
"pt"
,
[
PGE_ARTICLE
],
max_length
=
1024
,
padding
=
"max_length"
,
truncation
=
True
,
return_tensors
=
"pt"
,
).
to
(
torch_device
)
hypotheses_batch
=
model
.
generate
(
...
...
@@ -506,7 +519,10 @@ class BartModelIntegrationTests(unittest.TestCase):
decoder_start_token_id
=
model
.
config
.
eos_token_id
,
)
decoded
=
tok
.
batch_decode
(
hypotheses_batch
,
skip_special_tokens
=
True
,)
decoded
=
tok
.
batch_decode
(
hypotheses_batch
,
skip_special_tokens
=
True
,
)
self
.
assertEqual
(
EXPECTED_SUMMARY
,
decoded
[
0
])
def
test_xsum_config_generation_params
(
self
):
...
...
tests/test_modeling_bert.py
View file @
a75c64d8
...
...
@@ -264,7 +264,10 @@ class BertModelTester:
model
.
to
(
torch_device
)
model
.
eval
()
result
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
next_sentence_label
=
sequence_labels
,
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
next_sentence_label
=
sequence_labels
,
)
self
.
parent
.
assertEqual
(
result
.
logits
.
shape
,
(
self
.
batch_size
,
2
))
...
...
tests/test_modeling_camembert.py
View file @
a75c64d8
...
...
@@ -33,7 +33,9 @@ class CamembertModelIntegrationTest(unittest.TestCase):
model
.
to
(
torch_device
)
input_ids
=
torch
.
tensor
(
[[
5
,
121
,
11
,
660
,
16
,
730
,
25543
,
110
,
83
,
6
]],
device
=
torch_device
,
dtype
=
torch
.
long
,
[[
5
,
121
,
11
,
660
,
16
,
730
,
25543
,
110
,
83
,
6
]],
device
=
torch_device
,
dtype
=
torch
.
long
,
)
# J'aime le camembert !
output
=
model
(
input_ids
)[
"last_hidden_state"
]
expected_shape
=
torch
.
Size
((
1
,
10
,
768
))
...
...
tests/test_modeling_common.py
View file @
a75c64d8
...
...
@@ -330,7 +330,9 @@ class ModelTesterMixin:
# Prepare head_mask
# Set require_grad after having prepared the tensor to avoid error (leaf variable has been moved into the graph interior)
head_mask
=
torch
.
ones
(
self
.
model_tester
.
num_hidden_layers
,
self
.
model_tester
.
num_attention_heads
,
device
=
torch_device
,
self
.
model_tester
.
num_hidden_layers
,
self
.
model_tester
.
num_attention_heads
,
device
=
torch_device
,
)
head_mask
[
0
,
0
]
=
0
head_mask
[
-
1
,
:
-
1
]
=
0
...
...
@@ -370,7 +372,10 @@ class ModelTesterMixin:
return
for
model_class
in
self
.
all_model_classes
:
(
config
,
inputs_dict
,)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
(
config
,
inputs_dict
,
)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
"head_mask"
in
inputs_dict
:
del
inputs_dict
[
"head_mask"
]
...
...
@@ -399,7 +404,10 @@ class ModelTesterMixin:
return
for
model_class
in
self
.
all_model_classes
:
(
config
,
inputs_dict
,)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
(
config
,
inputs_dict
,
)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
"head_mask"
in
inputs_dict
:
del
inputs_dict
[
"head_mask"
]
...
...
@@ -432,7 +440,10 @@ class ModelTesterMixin:
return
for
model_class
in
self
.
all_model_classes
:
(
config
,
inputs_dict
,)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
(
config
,
inputs_dict
,
)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
"head_mask"
in
inputs_dict
:
del
inputs_dict
[
"head_mask"
]
...
...
@@ -463,7 +474,10 @@ class ModelTesterMixin:
return
for
model_class
in
self
.
all_model_classes
:
(
config
,
inputs_dict
,)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
(
config
,
inputs_dict
,
)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
"head_mask"
in
inputs_dict
:
del
inputs_dict
[
"head_mask"
]
...
...
@@ -534,7 +548,8 @@ class ModelTesterMixin:
seq_length
=
self
.
model_tester
.
seq_length
self
.
assertListEqual
(
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
[
seq_length
,
self
.
model_tester
.
hidden_size
],
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
[
seq_length
,
self
.
model_tester
.
hidden_size
],
)
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
...
@@ -550,7 +565,10 @@ class ModelTesterMixin:
check_hidden_states_output
(
inputs_dict
,
config
,
model_class
)
def
test_feed_forward_chunking
(
self
):
(
original_config
,
inputs_dict
,)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
(
original_config
,
inputs_dict
,
)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
for
model_class
in
self
.
all_model_classes
:
torch
.
manual_seed
(
0
)
config
=
copy
.
deepcopy
(
original_config
)
...
...
@@ -570,7 +588,10 @@ class ModelTesterMixin:
self
.
assertTrue
(
torch
.
allclose
(
hidden_states_no_chunk
,
hidden_states_with_chunk
,
atol
=
1e-3
))
def
test_resize_tokens_embeddings
(
self
):
(
original_config
,
inputs_dict
,)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
(
original_config
,
inputs_dict
,
)
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
not
self
.
test_resize_embeddings
:
return
...
...
@@ -844,7 +865,14 @@ class ModelTesterMixin:
model
.
generate
(
input_ids
,
do_sample
=
False
,
num_return_sequences
=
3
,
num_beams
=
2
)
# num_return_sequences > 1, sample
self
.
_check_generated_ids
(
model
.
generate
(
input_ids
,
do_sample
=
True
,
num_beams
=
2
,
num_return_sequences
=
2
,))
self
.
_check_generated_ids
(
model
.
generate
(
input_ids
,
do_sample
=
True
,
num_beams
=
2
,
num_return_sequences
=
2
,
)
)
# num_return_sequences > 1, greedy
self
.
_check_generated_ids
(
model
.
generate
(
input_ids
,
do_sample
=
False
,
num_beams
=
2
,
num_return_sequences
=
2
))
...
...
tests/test_modeling_ctrl.py
View file @
a75c64d8
...
...
@@ -30,7 +30,8 @@ if is_torch_available():
class
CTRLModelTester
:
def
__init__
(
self
,
parent
,
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
14
...
...
tests/test_modeling_distilbert.py
View file @
a75c64d8
...
...
@@ -179,7 +179,9 @@ if is_torch_available():
multiple_choice_inputs_ids
=
input_ids
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
multiple_choice_input_mask
=
input_mask
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
result
=
model
(
multiple_choice_inputs_ids
,
attention_mask
=
multiple_choice_input_mask
,
labels
=
choice_labels
,
multiple_choice_inputs_ids
,
attention_mask
=
multiple_choice_input_mask
,
labels
=
choice_labels
,
)
self
.
parent
.
assertEqual
(
result
.
logits
.
shape
,
(
self
.
batch_size
,
self
.
num_choices
))
...
...
tests/test_modeling_dpr.py
View file @
a75c64d8
...
...
@@ -149,7 +149,10 @@ class DPRModelTester:
model
=
DPRReader
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
result
=
model
(
input_ids
,
attention_mask
=
input_mask
,)
result
=
model
(
input_ids
,
attention_mask
=
input_mask
,
)
self
.
parent
.
assertEqual
(
result
.
start_logits
.
shape
,
(
self
.
batch_size
,
self
.
seq_length
))
self
.
parent
.
assertEqual
(
result
.
end_logits
.
shape
,
(
self
.
batch_size
,
self
.
seq_length
))
...
...
@@ -173,7 +176,15 @@ class DPRModelTester:
@
require_torch
class
DPRModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
DPRContextEncoder
,
DPRQuestionEncoder
,
DPRReader
,)
if
is_torch_available
()
else
()
all_model_classes
=
(
(
DPRContextEncoder
,
DPRQuestionEncoder
,
DPRReader
,
)
if
is_torch_available
()
else
()
)
test_resize_embeddings
=
False
test_missing_keys
=
False
# why?
...
...
tests/test_modeling_electra.py
View file @
a75c64d8
...
...
@@ -39,7 +39,8 @@ if is_torch_available():
class
ElectraModelTester
:
def
__init__
(
self
,
parent
,
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
...
...
tests/test_modeling_encoder_decoder.py
View file @
a75c64d8
...
...
@@ -391,7 +391,11 @@ class EncoderDecoderMixin:
decoder_input_ids
=
ids_tensor
([
13
,
1
],
model_2
.
config
.
encoder
.
vocab_size
)
attention_mask
=
ids_tensor
([
13
,
5
],
vocab_size
=
2
)
with
torch
.
no_grad
():
outputs
=
model_2
(
input_ids
=
input_ids
,
decoder_input_ids
=
decoder_input_ids
,
attention_mask
=
attention_mask
,)
outputs
=
model_2
(
input_ids
=
input_ids
,
decoder_input_ids
=
decoder_input_ids
,
attention_mask
=
attention_mask
,
)
out_2
=
outputs
[
0
].
cpu
().
numpy
()
out_2
[
np
.
isnan
(
out_2
)]
=
0
...
...
@@ -401,7 +405,9 @@ class EncoderDecoderMixin:
model_1
.
to
(
torch_device
)
after_outputs
=
model_1
(
input_ids
=
input_ids
,
decoder_input_ids
=
decoder_input_ids
,
attention_mask
=
attention_mask
,
input_ids
=
input_ids
,
decoder_input_ids
=
decoder_input_ids
,
attention_mask
=
attention_mask
,
)
out_1
=
after_outputs
[
0
].
cpu
().
numpy
()
out_1
[
np
.
isnan
(
out_1
)]
=
0
...
...
tests/test_modeling_flaubert.py
View file @
a75c64d8
...
...
@@ -39,7 +39,8 @@ if is_torch_available():
class
FlaubertModelTester
(
object
):
def
__init__
(
self
,
parent
,
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
...
...
tests/test_modeling_gpt2.py
View file @
a75c64d8
...
...
@@ -244,7 +244,8 @@ class GPT2ModelTester:
# append to next input_ids and attn_mask
next_input_ids
=
torch
.
cat
([
input_ids
,
next_tokens
],
dim
=-
1
)
attn_mask
=
torch
.
cat
(
[
attn_mask
,
torch
.
ones
((
attn_mask
.
shape
[
0
],
1
),
dtype
=
torch
.
long
,
device
=
torch_device
)],
dim
=
1
,
[
attn_mask
,
torch
.
ones
((
attn_mask
.
shape
[
0
],
1
),
dtype
=
torch
.
long
,
device
=
torch_device
)],
dim
=
1
,
)
# get two different outputs
...
...
Prev
1
…
4
5
6
7
8
9
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment